/* rule based OCR engine, partly rewritten for edges (old=pixel) */ /* This is a Optical-Character-Recognition program Copyright (C) 2000-2018 Joerg Schulenburg This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. see README for email address >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<< I have invested lot of time, to write this part of the program. This engine should recognize chars allways right or return UNKNOWN. If you change something, test all other example files too, to be sure that all things work better. (JoergS) This engine was pixelbased until 0.40 which was not successfull enough. Also code changes always hade side effects. The vectorisation of the code starts from version 0.41 with the chars XNz and seems to be much better to handle. Vectorization means we frame each character by a chain of vectors and dont care about pixels anymore. Unfortunatly I have to replace all the pixel codes, which is a long process. Old code will be lost. (JorgS) ToDo: - if box1->p and b differ, reduce probability - probability makes life much easier here - use only one box!?, may be bits have usefull infos - divide this file, suggestion: classify chars: high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz or often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,: or every char (large overhead) - two-pass version (first pass without tolerance) 2nd pass with tolerance (ex: one tiny more in sdata->holes) general feature extraction: - white holes at middle, upper, lower position (cost much time) - test lines and triangles insteat of rectangles char is removed, wchar_t is used (better code) making a static global variable-set x.x0,x.x1, and call test_a, test_b ... (faster compilation, but not reentrant!) - adding slant-angle (if detected) to distinguish between l and / ? - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/"; for better context correction or output: "Ha[lI][lI]o!" */ #include #include // #include "pgm2asc.h" #include "ocr0.h" // #include "ocr1.h" #include "amiga.h" #include "pnm.h" #include "gocr.h" #include "unicode_defs.h" #include "ocr0_dbg.h" /* define DO_DEBUG IFV MM Setac Break MSG DBG */ /* extern "C"{ */ // static inline int sq(int x) { return x*x; } /* square */ /* * go from vector j1 to vector j2 and measure maximum deviation of * the steps from the line connecting j1 and j2 * return the squared maximum distance * in units of the box size times 1024 * ToDo: 1) better give back max-dx and max-dy ??? * errors if j1 and j2 are in different frames or belong to * more then one frame? * 2) Better get deviation from a complete vector graphic? * The vectorgraphic is the ideal test char adapted to the * extrem vertices of the real char. */ int line_deviation( struct box *box1, int j1, int j2 ) { int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2; r1x=box1->frame_vector[j1][0]; r1y=box1->frame_vector[j1][1]; r2x=box1->frame_vector[j2][0]; r2y=box1->frame_vector[j2][1]; if (!box1->num_frames) return(-1); if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] || j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) { fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__); return(-1); } /* get the frame the endvector belongs to */ for (i=0;inum_frames;i++) if (j2num_frame_vectors[i]) break; frame=i; /* frame(j1)<=frame(j2) possible */ if (j1!=j2) // 2017-03 start j1+1 and j1!=j2 added for (i=j1+1;;i++) { // do it for each vector between j1 and j2 if (i >= box1->num_frame_vectors[frame]) i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ if (i==j2) break; // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~} r3x=box1->frame_vector[i][0]; r3y=box1->frame_vector[i][1]; // Language=german // german: Abstand Punkt von Strecke, Laenge Lotrechte // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1 // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x] // Schnittpunkt : l1=l2, // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0 // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0 // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x) // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y) // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly) l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1 if (l2==0) { // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging d=-1024; } else d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x) +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024.. // d is rel. position on j1-j2 line -1024=j1 0=center +1024=j2 if (d<=-1024) { x=r1x*1024; y=r1y*1024; } // starting point else { if (d>=1024) { x=r2x*1024; y=r2y*1024; } // end point else { x=(r1x+r2x+1)*1024/2+(d*(r2x-r1x))/2; // 1024 units y=(r1y+r2y+1)*1024/2+(d*(r2y-r1y))/2; /* we have the crossing point x,y now */ } } dist=sq((x-r3x*1024)/(box1->x1-box1->x0+1+4)) // 2017-03 +2 (small fonts) +sq((y-r3y*1024)/(box1->y1-box1->y0+1+4)); // 0..2*sq(1024) // d is rel. position on j1-j2 line -1024=j1 0=center +1024=j2 // j1 i j2 x[i] y[i] DBG( IFV fprintf(stderr,"\nDBG deviation j1-j2 %2d %2d %2d d %5.2f xy %3d %3d %4.1f %4.1f dist %5d", j1,i,j2, d/1024., r3x-box1->x0,r3y-box1->y0,x/1024.-r3x,y/1024.-r3y,dist);) if (dist>maxdist) maxdist=dist; // for debugging: // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d" // " vector= %d %d crosspoint= %d %d ", // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y); } // loop i=j1..j2 return maxdist; } // line_deviation /* * search vectors between j1 and j2 for nearest point a to point r * example: * * r-> $$...$$ $ - mark vectors * @@$..@@ @ - black pixels * @@$..@@ . - white pixels * @@@@.$@ * a-> @@$@$@@ * @$.@@@@ * @@..$@@ * @@..$@@ * j1 --> $$...$$ <-- j2 * * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry? * j1 and j2 must be in the same frame * return aa? * 2009-07: * - change from normalized (dx=128,dy=128) to absolute distance * - simpler and no squeeze effect (problem getting right i2 for "3") * 2018-09: * thin fonts may have inner and outer vector nearest to rx,ry, 5x8.y * */ int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) { int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */ int frame=0; // int x0=box1->x0, y0=box1->y0, // x1=box1->x1, y1=box1->y1; // int dx=box1->x1-x0+1, dy=box1->y1-y0+1; // 2017-03 fix j1,j2 >= max (old: j1,j2 > max) if (!box1->num_frames) return(-1); if (j1<0 || j1>=box1->num_frame_vectors[box1->num_frames-1] || j2<0 || j2>=box1->num_frame_vectors[box1->num_frames-1]) { fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2); out_x(box1); return(-1); } aa[0]=x=box1->frame_vector[j2][0]; /* x */ aa[1]=y=box1->frame_vector[j2][1]; /* y */ /* maximum is (distance*128)^2 if r is inside the box */ // aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy); aa[2]=d=2*(sq(x-rx)+sq(y-ry)); /* must be greater than min. dist, Jul09 */ aa[3]=j2; /* vector index */ /* get the frame the endvector belongs to */ for (i=0;inum_frames;i++) if (j2num_frame_vectors[i]) break; frame=i; /* frame(j1)<=frame(j2) possible */ for (i=j1;;i++) { if (i >= box1->num_frame_vectors[frame]) i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ x=box1->frame_vector[i][0]; /* take a vector */ y=box1->frame_vector[i][1]; /* distance to upper left end, normalized to 128 */ // d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); // old 2009-07 d=sq(x-rx)+sq(y-ry); if (d0 and m==1 box1 is changed // m>0 modify box1->dots // m==2 modify box1->y0 // called by pgm2asc + ocr0(?) // ToDo: because we do modifications here, call it with a copy!? // modification can have undesired side effects else // dont remove upper dot from ":" 2010-09-30 int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){ // pix p=*(box1->p); int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3, xl,xr,yu,yl; // left, right, upper and lower border of dots wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */ DBG( wchar_t c_ask='"'; ) if (box1->num_frames<1) return 0; if (box1->num_frames==2) { if (box1->y0>box1->m1 && abs(box1->frame_vol[0] -box1->frame_vol[1]) <=abs(box1->frame_vol[0] +box1->frame_vol[1])/8) return 0; // ":" 2010-09-30 } r=0; x0=box1->x0; x1=box1->x1; dx=x1-x0+1; y0=box1->y0; y1=box1->y1; dy=y1-y0+1; m1=box1->m1; m2=box1->m2; m3=box1->m3; xl=x0; xr=x1; yu=yl=y0; if (dy < 5 || 4*y0 > 3*m2+m3) return 0; // no low chars: .,-= if (y0 >= m2 && y1 >= m3) return 0; // 2010-10-08 ocr-a + ocr-b + qemu ';' /* modifier in box included? */ if( 2*y1 > m1+m2 ){ /* modifier in box included? */ for(y=y0;2*yp,cs,1)==0 ) break; if( 2*y extract */ yl=y; while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++; // 2010-09-24 this was bad code, destroying earlier good work for ":" // if( m&2 ) box1->y0=y; /* set new upper bond */ } } if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */ if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap? while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xlp,cs,1)==1 ) break; for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break; if ( yl-1>yu ) { // tall box ij"a"o"u #if 0 // temporary set new y0 (not needed!) x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x; fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); #define DEBUG 1 #endif { x=xl;y=yu; if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap? else if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0 || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0 || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 ) { int i,j,x; r=1; // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE // ..@...@... // @@.@@@@@.. // ..@...@... // @......... // ..@..@@... // ...@@@.... for (i=yu;ip,cs,1)==1) break; for ( ;ip,cs,1)==0) break; for (j=xl;jp,cs,1)==1) break; for ( ;jp,cs,1)==0) break; for ( x=j;xp,cs,1)==1) break; // vert. gap detected if( j2 && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!! && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2 && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2 ){ // may be the following lines are not quite ok while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yly0!=yl) { MSG(fprintf(stderr,"set new upper bound y0+= %+3d",yl-box1->y0);) box1->y0=yl; } /* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */ // out_x(box1); } if (r==0){ // divided fr != fi while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yuy0=yu; } if( r==1 ){ yl--; // .@@@. ..@@. // .@@.. .@@.. // .@... .@@.. // // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) // > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE) // < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) - loop(box1->p,xr,yu,xr-xl,cs,0,LE) > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03 mod = ACUTE_ACCENT; // ' if( xr-xl+1 > 3*(yl-yu+1) && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 ) mod = MACRON; // "-" above // .@@@. .@@.. // ..@@. ..@@. // ...@. ..@@. // // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) // < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE) // > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) - loop(box1->p,xr,yu,xr-xl,cs,0,LE) < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03 mod = GRAVE_ACCENT; // `` #ifdef DEBUG fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); #endif if( (xr-xl+1) < 2*(yl-yu+1)+2 && 2*(xr-xl+1)+2 > (yl-yu+1) ) { int i,i1,i2,i3,i4; i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI); i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI); i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO); i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO); for (i=0;ip,xl+i,yu+i)< cs) break; i3=i; for ( ;ip,xl+i,yu+i)>=cs) break; i3=i-i3; for (i=0;ip,xr-i,yu+i)< cs) break; i4=i; for ( ;ip,xr-i,yu+i)>=cs) break; i4=i-i4; #ifdef DEBUG fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4); #endif if ( (xr-xl<5 && yl-yu<8) /* to small */ || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */ && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2 && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4) ) mod = DOT_ABOVE; // "." above "ij", not ":;", ToDo: improve it! } if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8 || loop(box1->p,xl,yu ,xr-xl,cs,0,RI) > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8 || loop(box1->p,xr,yu ,xr-xl,cs,0,LE) > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 ) && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1 && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2 || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 )) mod = CIRCUMFLEX_ACCENT; // "^" if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) mod = CARON; // "v" above if( /* test for bow (new0.3.6) */ loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + loop(box1->p,xl,yl ,xr-xl,cs,0,RI) - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1 && xr-xl>10) if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) mod = BREVE; // round "u" above if( xr-xl>3 && yl-yu>1 ) if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) > loop(box1->p,xl,yl,xr-xl,cs,0,RI) && loop(box1->p,xr,yu,xr-xl,cs,0,LE) < loop(box1->p,xr,yl,xr-xl,cs,0,LE) && num_cross(xl,xr,yu,yu,box1->p,cs) == 2 && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 ) mod = TILDE; if( xr-xl>2 && yl-yu>2) if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 ) if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 ) if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 ) // if (sdata->holes.num) ... not in this range? mod = RING_ABOVE; #ifdef DEBUG printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s", (int)mod,yu-box1->y0,yl-box1->y0, xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON": ((mod==ACUTE_ACCENT)?"ACUTE": ((mod==TILDE)?"TILDE":"?")))); out_x(box1); #endif } } if (m) box1->dots=r; // set to 0 also possible after division if (m) box1->modifier=mod; /* should be resetted after compose ??? */ MSG(fprintf(stderr,"testumlaut mod=%s dots=%d y0+%d m=%d nac=%d", decode(mod,ASCII),r,box1->y0-y0,m,box1->num_ac);) } // printf(" modifier=%c",mod); if (modifier) *modifier=mod; /* set modifier */ return r; } static wchar_t ocr0_eE(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- most frequent letter e first!!! // new vector based fat e variant withot holes 2010-10-10 // --- test e --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6) DBG( wchar_t c_ask='e'; ) // if (sdata->holes.num > 0) break; if (box1->num_frames != 1) break; /* 7x7 .@@@@@.<- ..0@@$. @@@@@@@ .@@@@@3 @@@@@@@ $@@@@@@ <- min_gray=101 b=0 w=233 (ToDo17: check mingray) @@@@@@@ <- 2 @@$@@@$ @@..... <- 1 1$ @@@@@@@ .@$@@@$ .@@@@@@<- ..$@@@2 see tmp13/sslmozFP_Fi.png 8x9-font a0.x>dx/3? */ if (aa[0][0]>x0+dx/3 || aa[0][1]>y0+dy/4) Break; if (aa[1][0]>x0+dx/3 || aa[1][1]y0+dy/4) Break; // upper body must at least 2 times thicker than low line i= loop(box1->p,x0+dx/2,y0 ,y1-y0,cs,0,DO); if (i>dy/8) Break; i= loop(box1->p,x0+dx/2,y0+i,y1-y0,cs,1,DO); j= loop(box1->p,x0+dx/2,y1 ,y1-y0,cs,0,UP); if (j>dy/8) Break; j= loop(box1->p,x0+dx/2,y1-j,y1-y0,cs,1,UP); if (j>dy/3) Break; if (i<2*j-dy/16) Break; // leftmost gap from the right i1=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0+2*dy/3); if (box1->frame_vector[i1][0]>=x0+(dx+2)/3) Break; // 2017-03 tmp15/ssl if (box1->frame_vector[i1][1]<=y0+dy/2) Break; // strong for bad e i2=nearest_frame_vector(box1,i1 ,aa[3][3], x1, y0+2*dy/3); if (box1->frame_vector[i2][0]< x1-dx/8-1) Break; if (box1->frame_vector[i2][1]< y0+dy/2-1) Break; // strong for bad e if (box1->m2) { if (sdata->gchar) ad=98*ad/100; if (sdata->hchar) ad=98*ad/100; } else ad=99*ad/100; Setac(box1,(wchar_t)'e',ad); if (ad>=100) return 'e'; break; } // old-pixel based variant // --- test e --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6) DBG( wchar_t c_ask='e'; ) // if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ // if (sdata->holes.num != 1) ad=97*ad/100; if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80 if (box1->num_frames > 2) Break; // excludes tiny holes 1810.rnd80 /* ToDo: may be a two pass version intolerant/tolerant is better */ if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break; if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break; if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2 && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40 if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); } if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); } if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); } i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break; j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( jp,x0,y1 ,x1-x0,cs,0,RI); if( jp,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break; j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( jp,x0 ,y0,y1-y0,cs,0,DO); if( jp,x1 ,y0,y1-y0,cs,0,DO); if( jp,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break; j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( jp,x1 ,y1,y1-y0,cs,0,UP); if( jp,x0, (y0+y1)/2,x1-x0,cs,0,RI) -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI) -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI); if (dx>3 && j>=dx/4) Break; // ~g 4x6font for(y=1;yp,cs) == 2 ) break; if( y==dy/2 ) Break; // v0.2.5 ~ bad_t for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++) if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++; if( dx>4 && dy>5 && (i set x,y for(x=0,y=i=y0+dy/3;ip,x1,i,y1-y0,cs,0,LE); if(j>=x) { x=j;y=i; } } if (x 2* smallest thickness right for(i1=dx,i=y0+dy/3;ip,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break; j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI); if (jp,x1 ,i,y1-y0,cs,0,LE); j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE); if(j2*i1) Break; // not accepted, if right line is not very thinn x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE); x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE); x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE); if (3*i2>i1) ad=99*ad/100; if (2*i2>i1) ad=99*ad/100; bad_e=60; // used later? } if (xp,cs) > 1 ) i=0; if( i ) Break; // ..@@@@...<- // .@@@@@@;. // @@,...@@. // @@.....@, // @@@@@@@@@ // @@.,;.@,. <- problem (y) == bad_e>50 // @@.....@. // @@,...@@. // .@@@,@@@. // ..@@@@;..<- if (dy>11 && bad_e<50) if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e" if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1 && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break; // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){ if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){ if( sdata->hchar ) Break; // ~ \it t // look if thinn font (may be h-line is broken) Mai00 for(j=0,i=x0+dx/8;ip,cs,1) == 1 ) j++; if(j<2*dx/4) Break; } if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break; if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break; if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) { ad=95*ad/100; } /* 8*10 @ (=at) is not an e */ // look for horizontal gap for(x=0,y=i=y0+dy/4;ip,x0,i,x1-x0,cs,0,RI); if(j>=x) { x=j;y=i; } } if (y>y0+dy/4 && ydx/2) Break; // s if (x>dx/4) ad=99*ad/100; if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0 && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0 && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) { if (sdata->gchar) Break; // ~p ad=99*ad/100; } /* upper case is for 5x6 box */ if( sdata->hchar // broken B ? should also work when linedetection fails && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) { x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break; if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break; } x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI); if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q if (box1->m2) { if (sdata->gchar) ad=99*ad/100; if (sdata->hchar) ad=99*ad/100; } else ad=99*ad/100; Setac(box1,(wchar_t)'e',ad); if (ad>=100) return 'e'; break; } // --- test E --------------------------------------------------- for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4 // rewritten for vectors 0.43 int i1, i2, i3, i4, i5; // line derivation + corners DBG( wchar_t c_ask='E'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */ if (aa[0][2]>d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* E f near E OOOOOOOO OOOO O5 O O O4 O OOOO3 OOOOOO O2 O O O O1 O O OOOOOOOO OOOOOO */ // check the bow from below for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal! } if (i!=aa[2][3]) Break; // ~AHKMNRX // search most left+down between bottom right and top right i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1); i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0); i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2); i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3); i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3); i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2); if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break; if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);) // holes right open? for( i=1,y=y0; yp,cs,2) == 0 ) i=0; if( i ) Break; for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0; if( i ) Break; for( i=1,y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0; } if( i ) Break; x=x1-dx/3; y=y0; // von oben durchbohren! turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break; x=x1-dx/3; y=y1; // von unten durchbohren! turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( yp,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( yp,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || yp,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; y+=dy/15; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x15 && x==x0) ad=99*ad/100; // to thin x+=dx/15+1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break; // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break; // if (sdata->holes.num > 0) Break; i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break; j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(ji+dx/8) Break; i=j; j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(ji+dx/8) Break; j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE); for( x=dx,y=y0+dy/6; yp,x0,y,dx,cs,0,RI); if (i>j/2 && ad>98) ad=99*ad/100; if (i>dx/4) break; if(i3*dx) // ~[ if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break; if (box1->m2) { if (!hchar) ad=ad*99/100; if ( gchar) ad=ad*99/100; } Setac(box1,(wchar_t)'E',ad); if (ad>=100) return 'E'; break; } return box1->c; } static wchar_t ocr0_n(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,j,d,x,y,i1,i2,i3,handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test n --------------------------------------------------- // glued rm is very similar to glued nn -> thickness of h-line should grow // may02: tested for 8x12 font for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='n'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs); j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs); if( (i<2 || i>3) && j!=2 ) Break; if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */ y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */ if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1 && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr // ~thick_w y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break; if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break; y=3*dy/4; if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p y=dy/2; if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2 && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr /* printed n */ x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap i=dy/4; y=13*dy/16; if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n if (i<2 && il1 l2 l3 l4 ??? for(x=i1;xbp,x, 0,dy,cs,0,DO)>=i ) break; if(x bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break; if(x==i2) Break; // no gap detected (glued serifs ??? ) // glued rm as nn ??? for(y=0,x=(i1+i2)/2;xbp,x,0,dy,cs,0,DO); i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness if( i>y ) y=i; if( i7 ) if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE) +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1 > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE) +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o if( dy>7 && dx>7 ) if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0 && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o } else { /* check handwritten n */ if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3 && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break; i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI); if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break; handwritten=80; } i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5) if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE); if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0 && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0 && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P // glued ri ??? if( box1->dots>0 && box1->m1 ) if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 ) if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2 || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break; i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2) i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE); x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if (sdata->hchar && i-x>1) Break; // ß x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (ibp, 0, 1,dx,cs,0,LE); if (ibp, 0, 2,dx,cs,0,LE); if (ihchar && x>0) Break; // fl if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M if (sdata->hchar || 2*y0m1+box1->m2) ad=96*ad/100; if (sdata->gchar) ad=96*ad/100; // ß fl if (dx<5) { // for small fonts no middle line is possible for m ad=99*ad/100; // 4x6 m if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) { ad=97*ad/100; // ~m if (dy<=4) Setac(box1,'m',97); // only for 4x6 font! } } Setac(box1,'n',ad); break; } return box1->c; } static wchar_t ocr0_M(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int d,x,y,i0,i1,i2,i3,i4,i5,i6,i7,t1, hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ // ------------------ test M --------------------------- for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres // min-suchen fuer m DBG( wchar_t c_ask='M'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 d=2*sq(128/4); /* half distance to the center, added 2018-09 */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj..., upper right */ if (aa[0][2]>d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ // search 3 legs and 2 space between, [][3]=vector_index i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1); // leg1 i3=nearest_frame_vector(box1, i1,aa[3][3], x1, y1); // leg3 i4=nearest_frame_vector(box1, i1, i3, x0, y0); // gap1 i5=nearest_frame_vector(box1, i1, i3, x1, y0); // gap2 i2=nearest_frame_vector(box1, i4, i5, (x0+x1)/2, y1); // leg2 MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);) // 2018-09 ToDo: check essentials of 3 legs if (box1->frame_vector[i1][0] > x0+dx/4) Break; // leg1 not leftmost if (box1->frame_vector[i4][1] > y0+dy/2) Break; // gap1 too low if (box1->frame_vector[i5][1] > y0+dy/2) Break; // gap2 too low if (box1->frame_vector[i5][0] < x0+dx/2) Break; // gap2 too left if (box1->frame_vector[i5][0] < box1->frame_vector[i2][0]) Break; // gap2 more left than leg2 // check right side molten 'nt' of tmp13/sslmozFP.png i6=nearest_frame_vector(box1, i3, aa[3][3], x1+dx/2, y0+dy/8); i7=nearest_frame_vector(box1, i3, i6, x0, y0+dy/2); if (box1->frame_vector[i7][0]frame_vector[i6][0]-dx/8 && hchar) Break; // ad=97*ad/100; MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d ad=%d",\ i1,i2,i3,i4,i5,i6,i7,ad);) for (y=dy/4;y<=3*dy/4;y++) if (num_cross(0,dx-1,y,y,bp,cs)>=3) break; if (y>3*dy/4 && dx>4) Break; if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2 && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */ if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break; x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break; if( loop(bp, 0,7*dy/16,dx,cs,0,RI) + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K if (loop(bp, 0,dy-1, dx,cs,0,RI)>dy/4) Break; // ~V 2010-10 if (loop(bp, 0,dy-1, dx,cs,0,RI)>dy/8) { ad=ad*99/100;MSG({}) } // ~V 2010-10 if( dy>8 /* following lines should be extend to range check */ && loop(bp, dx/4,dy-1, dy,cs,0,UP) 2 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2 && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v if( loop(bp,3*dx/4, 0,dy,cs,0,DO) > loop(bp,2*dx/4, 0,dy,cs,0,DO) && loop(bp,3*dx/4,dy-1,dy,cs,0,UP) < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO) > loop(bp,2*dx/4, dy/8,dy,cs,0,DO) && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP) < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N // i0 is lower end of upper serifen (widest gap? ) i0=0; if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ? if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){ for(y=dy/2+1;yy-2 ) Break; // ~N } } } // MNWK for(i2=0,i1=x=dx/2;xi2) {i2=y;i1=x;} else break; } i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO); if(i2hchar) Break; // rm ad=99*ad/100; } if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A //if (sdata->holes.num != 0) Break; // includes tiny holes 1810.rnd80 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI); t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line? if( 7*(t1+1)=i2 ) Break; // no good M i1+=loop(bp,i1, dy/4,dx,cs,1,RI); i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI); if( i1>=i2 ) Break; // no good M i1+=loop(bp,i1, dy/4,dx,cs,0,RI); i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI); if( i1<=i2 ) Break; // no good M } if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2 && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u if (dy<17) if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100; if (dx>5) /* 4x6 font has only 1 cross at y=1 */ if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2 && dx>8 && dy>12 ){ ad=98*ad/100; for(y=5*dy/16;y<5*dy/8;y++) // look for H-line if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break; if( y<5*dy/8 ) ad=95*ad/100; if( y<5*dy/8 ) if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0 || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad! } if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2 && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100; if(!hchar){ ad=98*ad/100; /* not sure */ if( loop(bp,0, dy/4,dx,cs,0,RI) < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued } if( gchar ) ad=98*ad/100; if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */ Setac(box1,'M',ad); break; } return box1->c; } static wchar_t ocr0_N(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ dbg[9], ad; /* tmp-vars */ // --- test N ------- +hchar -gchar for(ad=d=100;dx>3 && dy>3;){ // 4x6font int j; DBG( wchar_t c_ask='N'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ //if (sdata->holes.num > 0) ad=98*ad/100; /* # */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 if (dx<6) ad=99*ad/100; if (dx<5) ad=99*ad/100; /* half distance to the center */ d=2*sq(128/4); /* ToDo17 bug? add doc, [][2]=dist? square() */ /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0]x0+dx/8) Break; // 2010-10-11 if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */ if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */ i= nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0+dy/2); x=box1->frame_vector[i][0]; if (x<=x0+dx/2 || x2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; /* i1: uppermost left ^ from bottom (near 0,0) */ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; MSG( fprintf(stderr,"i1= %d (%d,%d) left ^ from below", i1,x-x0,y-y0);) if (y-y0 > 5*dy/8) Break; if (x-x0 > 5*dx/8) Break; /* i3: uppermost right ^ ~H */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); MSG( fprintf(stderr,"i3= %d (%d,%d) right ^ (ad=%d)",\ i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);) /* check lower border of diagonal line, may fail on fonts where * line ends on middle of right vertical line (screen font) */ dbg[0]=d=line_deviation(box1,i1, aa[2][3]); /* check right border of left vertical line */ /* but split to possible lower left serif + vert. line */ j=nearest_frame_vector(box1,aa[1][3],i1, x0+dx/2, y1+dy/2); dbg[1]=d=line_deviation(box1, aa[1][3],j ) +line_deviation(box1, j,i1); MSG(fprintf(stderr," i1-a2 %d a1_serif-i1 %d ad=%d",dbg[0],dbg[1],ad);) if (dbg[0] > sq(1024/4)) Break; if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024) if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024) // serif N has d=sq(1024/3)=116508 MSG( fprintf(stderr,"ad %d", ad); ) /* i2: lowest right v from top, same frame? N-tilde etc.? */ i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8); x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; MSG( fprintf(stderr,"i2= %d (%d,%d) lowest right v from top",\ i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);) if (y-y0 < 3*dy/8) Break; if (x-x0 < 3*dx/8) Break; // test H if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4 && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8 && y<=box1->frame_vector[i1][1]) Break; /* check if upper left and lower right point are joined directly */ /* but split to possible upper right serif + down line */ j=nearest_frame_vector(box1,i2,aa[0][3], x0+dx/2, y0-dy/2); dbg[2]=d=line_deviation(box1,i2, j) +line_deviation(box1, j, aa[0][3]); /* check if upper right and lower right point are joined directly */ /* but split to possible upper right serif + vert. line */ j=nearest_frame_vector(box1,aa[3][3],i2, x0+dx/2, y0-dy/2); dbg[3]=d=line_deviation(box1, aa[3][3],j) +line_deviation(box1, j,i2); // ToDo: split once more? MSG( fprintf(stderr," i2-a0 %d a3-i2 %d ad %d",dbg[2],dbg[3], ad); ) if (dbg[2] > sq(1024/4)) Break; // serif N, ToDo: do it better if (dbg[3] > sq(1024/4)) ad=97*ad/100; if (dbg[3] > sq(1024/3)) Break; MSG( fprintf(stderr,"ad %d", ad); ) MSG( fprintf(stderr,"check against melted tu"); ) // i1 = left ^ from below, i2 = lowest right v from top // sample gocr_Device*: 3-8,dy=27 if ( (box1->frame_vector[i1][1]-y0) -(y1-box1->frame_vector[i2][1])>dy/8) ad=99*ad/100; /* ~ tu */ MSG( fprintf(stderr,"tu ad %d", ad); ) if (box1->frame_vector[i2][0] -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */ MSG( fprintf(stderr,"i2-i1<=dx/8 ad %d", ad); ) /* i1: uppermost left ^ from bottom (near 0,0) */ /* i2: lowest right v from top, same frame? N-tilde etc.? */ if (box1->frame_vector[i2][1] -box1->frame_vector[i1][1]<=dy/8) { // may happen on screen fonts 7x10 if (dx>8) ad=97*ad/100; /* too flat (ff,H) */ } MSG( fprintf(stderr,"i2-i1<=dy/8 ad %d", ad); ) if (box1->frame_vector[i2][1] -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100; MSG( \ fprintf(stderr,"^v %d %d %d %d line dev %d %d %d %d max %d %d ad %d",\ box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024),ad);) ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100; MSG( fprintf(stderr,"ad %d", ad); ) ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100; MSG( fprintf(stderr,"ad %d", ad); ) ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100; MSG( fprintf(stderr,"ad %d", ad); ) ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100; MSG( fprintf(stderr,"ad %d", ad); ) if (!hchar) ad=99*ad/100; if ( gchar) ad=98*ad/100; // \sc N Setac(box1,'N',ad); break; } return box1->c; } static wchar_t ocr0_h(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test h --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 // rewritten for vectors 0.42 int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners DBG( wchar_t c_ask='h'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80 if (box1->num_frames > 2) Break; // excludes tiny holes 1810.rnd80 /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* type A B=italic ??? 18 OOO O O O O O O7OOO OOOO O4 O O O O O O O O O O O O 2O3 5O6 O OOO */ i1=i8=aa[0][3]; i2=i3=aa[1][3]; i5=i6=aa[2][3]; // check the bow from below (fails on melted serifs) for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] frame_vector[i4][1]) i4=i; // get next maximum if (box1->frame_vector[ i][1]<=y0) break; // fatal! } if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]18 ) if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break; if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s- for( x=x0+dx/3;xp,cs,1) == 0 ) break; if( x>=x1-dx/3 ) Break; for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){ if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--; } if( i ) Break; for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){ if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--; } if( i ) Break; // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen // if (sdata->holes.num > 0) if (box1->num_frames > 1) // excludes tiny holes 1810.rnd80 if (sdata->holes.hole[0].y0 > dy/3 && sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8 < loop(bp,dx-1,dy/2,dx,cs,0,LE) && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8 < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00 i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu ad=(99-(1<p,cs) == 0 ) ad=97*ad/100; if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100; if (ad<1) break; } i =loop(bp,0,dy/4,dx,cs,0,RI); i+=loop(bp,i,dy/4,dx,cs,1,RI)+1; for ( ; i5*dy/8 ) { ad=98*ad/100; // melted hi, li, but handwritten h MSG(fprintf(stderr,"ad=%d",ad);) } if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) { ad=98*ad/100; // melted fr MSG(fprintf(stderr,"ad=%d",ad);) } i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) { ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i 0 ) { ad=95*ad/100; // melted fi MSG(fprintf(stderr,"ad=%d",ad);) } if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)p,x1,y0 ,dx,cs,0,LE)p,x1,y0+1,dx,cs,0,LE)holes.num > 0) ad=97*ad/100; if (box1->m2) { if ( gchar) ad=98*ad/100; if (!hchar) ad=97*ad/100; } else ad=99*ad/100; Setac(box1,'h',ad); break; } return box1->c; } static wchar_t ocr0_H(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test H --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='H'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80 if (box1->num_frames > 3) break; // excludes tiny holes 1810.rnd80 if ( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break; if ( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break; if ( loop(bp,0 ,dy/8,dx,cs,0,RI) + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A for ( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; } if ( !i ) Break; for ( i=1,y=dy/4; ydx/5 ) i=0; } if (!i) Break; // ~K Jul00 for (i=0,ya=y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } if (i<=dx/2) Break; ya-=y0; if (num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */ for (y=ya; y 2 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; if (yp,cs,1) == 0 ) i=0; } if (i) Break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if (get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; } if (i) Break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if (num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; } if (i) Break; for (i=1,y=y0;y<=y0+dy/4 && i;y++){ if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if (i) Break; for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if (i) Break; if (get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break; if (get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break; i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) Break; i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) Break; if (abs(i1+i3-2*i2)>dx/16+1) Break; // test for thick tall N looking like a H if (num_cross(x0,x1,y0, y1 , box1->p,cs) < 2 ) Break; // sure N if (num_cross(x0,x1,y0,(y0+y1)/2, box1->p,cs) < 2 ) Break; // sure N i1=loop(bp, 0, dy/4,dx,cs,0,RI); i1=loop(bp, i1, dy/4,dx,cs,1,RI); // right side of left vert. line i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI); i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); // right side of left vert. line i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE); i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); // left side of right vert. line if (dx<10 && i1-i2>dx/4) Break; if (dx<10 && i1-i2>dx/8) ad=99*ad/100; // 7x10 ~N i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI); i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI); i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI); if (i6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2) Break; // ??? if (dx>8) if (loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4 && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break; // if (sdata->holes.num != 0) Break; if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 MSG( fprintf(stderr,"i123 %d %d %d",i1,i2,i3); ) if ( gchar) ad=99*ad/100; if (!hchar) ad=98*ad/100; Setac(box1,'H',ad); break; } return box1->c; } static wchar_t ocr0_k(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test k --------------------------------------------------- for(ad=100;dx>2 && dy>3;){ // min 3x4 // rewritten for vectors 0.43 int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners DBG( wchar_t c_ask='k'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* type A B=italic ??? 18 OOO O O O O O6 O O7 OO O OO O4OO OO OO O OO O O O OO O O O 2O3 O5 O OOO */ i1=i8=aa[0][3]; i2=i3=aa[1][3]; i5= aa[2][3]; // check the bow from below for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] frame_vector[i4][1]) i4=i; // get next maximum if (box1->frame_vector[ i][1]<=y0) break; // fatal! } if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]frame_vector[i][0]frame_vector[i][0]frame_vector[i][0]dx/2) Break; i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI); if (abs(i1+i3-2*i2)>(dx+8)/16+1 || i1p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1 if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break; if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/4) Break; // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break; for(y=y0+1;yp,cs,1) == 0 ) break; if( yp,cs,100)>50) i=0; if( i ) Break; // no vertikal line! /* check for falling line in the lower right corner */ for (j=x=0,y=5*dy/8;y<7*dy/8;y++) { i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; } } // x=dx/6 on fat k if (x + loop(bp,dx-1-x,j,dx,cs,1,LE)/2 dx/2) i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break; i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2; if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break; for(y=y0+dy/3;yp,cs)==2 ) break; if( y==y1 ) Break; if( // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens! sdata->holes.num > 0 ) if (sdata->holes.hole[0].x1>dx-1-dx/4 || sdata->holes.hole[0].y1>dy-1-dy/4 || sdata->holes.hole[0].y0< dy/4) Break; // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break; i=loop(bp,0,dy-1,dx,cs,0,RI); i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz i =loop(bp,0,dy/4,dx,cs,0,RI); if (i>dx/4 && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2 && loop(bp, 0,0,dx,cs,0,RI)<=dx/4 && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q if( 2*y0>(box1->m1+box1->m2) ) { ad=99*ad/100; MSG({}) } if ( gchar) ad=99*ad/100; if (!hchar) ad=99*ad/100; Setac(box1,'k',ad); break; } return box1->c; } static wchar_t ocr0_K(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */ // --- test K --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // Mar2000 perfect??? no ocr-a X Jul09 DBG( wchar_t c_ask='K'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 for(y=dy/8;yp,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; } if (dy>15 && j>dy/8){ j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2; y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1; } if(y>=dy/4) i=0; /* ok, found gap */ } if( i ) Break; for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap i=loop(box1->p,x,y1,dy,cs,0,UP); /* on small chars bypass possible low left serifs */ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); if (i2>1) i+=i2-1; } if (i>y) { y=i; i1=x; } } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0;y<=y0+dy/4 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( dx<10 ){ for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; } if( i ) Break; } for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k if( loop(bp,0, dy/4,dx,cs,0,RI) +loop(bp,0,3*dy/4,dx,cs,0,RI) <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 ) { if (dy>=16) { Break; } else ad=98*ad/100; } // ~X Jul09 i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break; j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE); x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x; if(j<=i ) Break; i=j; j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break; // out_x(box1); // detailed analysis // // a d <= that are main points of K // | / + horizontal zerifes at a,c,d,f // b/e // | \ . // c f ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2; yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2; yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE); for(yd=ye=yf=xe=y=i=0,xf=xd=dx;yxe){ xe=x;ye=dy/2+y; } x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; } #if 0 // removed v0.2.4a2 x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x8 ){ // example szaka0103 if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n} i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22 if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break; // ImageMagick.convert.textfont=FreeMono-Regular,80 bowed if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) ad=99*ad/100; // right down if( get_line2(xe,ye,xf,yf,bp,cs,100)<80 ) Break; // right down xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n} } else { if( dy<16 && !hchar ) Break; if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4 && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4 && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x } if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){ ad=99*ad/100; /* broken B ? */ if (sdata->holes.num > 0) if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B } if(box1->m3 && !hchar) ad=99*ad/100; if(box1->m3 && gchar) ad=99*ad/100; // printf(" ok xe=%d",xe); Setac(box1,'K',ad); break; } return box1->c; } static wchar_t ocr0_f(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ab[8][4], /* special points (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ /* x=mindist_to_a y=0 "t" 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f" 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$.. .@... .@.. 2>.@@. start upper end .@.. .@.. 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$. 3>$@$$. 3>$@$. $@@$ $@$. .@.. 4>.$... 4>.$.. 4>.$$. lower end .$.. .$.. .@... .@.. .@@. .@.. .@.. .@... .@.. .@@. .@.. .@.. 5>.$... 5>.$.. 5>.$$. lower start .$.. .$.. 6>..... 6>$... 6>.... optional left bow */ // --- test f like t --------------------------------------------------- for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar! // rewritten for vectors 0.43 int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners DBG( wchar_t c_ask='f'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */ if (aa[0][2]>d ) Break; /* upper left end */ /* 9 OOO O 7 O8 O6 1OOOO5 O4 O 2O3 OOOOO */ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8); /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */ i =aa[1][3]; if (box1->frame_vector[i][1]frame_vector[i2][0]-x0>dx/2) Break; // ~3 i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2); // MSG(fprintf(stderr,"i %d",i);) if (box1->frame_vector[i ][0] -box1->frame_vector[i9][0]>dx/8) Break; // ~3 if( (box1->dots) ) Break; // Bold-face is gchar if (dy<=box1->m3-box1->m2+1) Break; for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2 i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break; i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; } if( y<3*dy/4 && y>dy/4 && num_cross(0,dx-1,y ,y ,bp,cs) != 1 && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise ) break; } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || ydy/8 && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0 && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8 && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1; if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break; // the middle bar appear in a wide vertical range, get part below for (i1=dx,i2=y,j=y+1;jframe_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) ab[7][0]=box1->frame_vector[i3][0]; ab[7][1]=box1->frame_vector[i3][1]; ab[7][3]=i3; if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0) // as the upper right end of the "t" if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f if ((2*x(j+dx/4)) break; if (iloop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X i=y;j=1; // j used as flag if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1 && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc. // check for upper bow to right for(y=1;j && y=cs || dx<7) && getpixel(bp,x+1,y )>=cs && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; } } if(j) ad=98*ad/100; // not detected // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e if (sdata->holes.num != 0) Break; // ~e for(i1=i2=dx,y=7*dy/8;yi2+dx/4) Break; // ~t ~e if(i1>i2+1) ad=96*ad/100; // ~t ~e if( loop(bp,0,3*dy/4,dx,cs,0,RI)5 && !hchar) if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 ) if( loop(bp,dx-1,dy-1,dy,cs,0,UP)8 ) if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3 || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 ) if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S if (!hchar) if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0 && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2 && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r if (dy>15) if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1 && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H if( dx>4 ) if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)- loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1 && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)=dx/5+1) ad=98*ad/100; // ~E i=loop(bp,dx/8,0,dy,cs,0,DO); if (idy/2) { ad=98*ad/100; // ~E, could also be a "f" with big serifs MSG(fprintf(stderr,"ad=%d",ad);) } if (!gchar) { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } } i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2; if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)1 && loop(bp,0, 0,dy/4,cs,0,DO)p,cs,2) == 0) { // white pixels? ad=98*ad/100; // F MSG(fprintf(stderr,"ad=%d",ad);) } if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% ! if (box1->m4>0 && gchar && ad<99 && 8*box1->y1 >= box1->m4*7+box1->m3) ad++; Setac(box1,'f',ad); break; } return box1->c; } static wchar_t ocr0_bB(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test B --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='B'; ) //if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 3) Break; // excludes tiny holes 1810.rnd80 for(i=1,y=y0;yp,cs,1) != 1 ) i=0; if( !i ) Break; for(i=1,y=y1-dy/2;yp,cs,1) != 1 ) i=0; if( !i ) Break; if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break; if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 ) if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break; /* --- detect center of lower hole --- */ y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break; y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break; y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (yp,x0,y0+ y ,dx,cs,0,RI) > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 ) if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 ) if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8 i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i; i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i; i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(dy>16 && i3p,x0,y0+ 1 ,dx,cs,0,RI) >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI) > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI) > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI) > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00 } // if (sdata->holes.num != 2) Break; // if (box1->num_frames != 3) Break; // excludes tiny holes 1810.rnd80 if (sdata->holes.hole[0].y0 < y-1 && sdata->holes.hole[1].y0 < y-1 ) Break; if (sdata->holes.hole[0].y1 > y+1 && sdata->holes.hole[1].y1 > y+1 ) Break; // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break; // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break; // out_x(box1); for( x=dx,y=dy/6; yp,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break; if(ix )break; } if( yx) x=i; // allow dust i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i; if ( loop(bp,0, dy/8,dx,cs,0,RI) +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex! if(!hchar){ // ~ fat_a ad=99*ad/100; x =loop(bp,0,dy/4,dx,cs,0,RI); if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100; } if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality if (gchar) ad=99*ad/100; Setac(box1,'B',ad); break; } // --- test b --------------------------------------------------- for(ad=d=100;dx>3 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='b'; ) //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80 for(y=y0;yp,cs,1) != 1 ) Break; if(yp,cs,1) != 1 ) Break; if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break; if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // & if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 ) if( dy<16 || num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b for(i=j=0,y=dy/2;yholes.num != 1) Break; if (sdata->holes.hole[0].y0 < dy/4) Break; if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1) *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16 < dx*dy) ad=90*ad/100; // hole to small if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break; i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break; // 2017-08 divided To to Tb ? i=loop(bp,0 ,dy/8 ,dx,cs,0,RI); if (i>dx/2) Break; i=loop(bp,i ,dy/8 ,dx,cs,1,RI); j=i; // upper width i=loop(bp,0 ,2*dy/3 ,dx,cs,0,RI); if (i>dx/4) Break; i=loop(bp,i ,2*dy/3 ,dx,cs,1,RI); // lower width DBG( IFV fprintf(stderr,"\nDBG b ij %3d %3d",i,j);) if (2*j=100) return 'b'; break; } return box1->c; } static wchar_t ocr0_dD(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test D --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='D'; ) //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80 if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 ) if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x1 ,y0+dy/3 ,y0+dy/3 ,box1->p,cs) != 2 && num_cross(x0 ,x1 ,y0+dy/3+1,y0+dy/3+1,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break; //if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80 //if (sdata->holes.num != 1) Break; if (sdata->holes.hole[0].y0 > dy/3) Break; if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; // test if left edge is straight for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){ i=loop(bp,0,y,x1-x0,cs,0,RI); if( i+2+dx/16<=x ) break; if( i>x ) x=i; } if (y>=dy/5 ) Break; /* test if right edge is falling */ for(x=dx,y=0;yx-1,y,x1-x0,cs,0,LE); if( i>x+dx/16 ) break; if( iy-1;y>2*dy/3;y--){ i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); if( i>x+dx/16 ) break; if( i2*dy/3 ) Break; if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <= loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2; if ( y>=dy/16 ) { y-=dy/16; if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A } ya=loop(bp, 0,dy-1,dy,cs,0,UP); yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP); if (yady/16 && ya>yb) Break; // ~O if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>dx/16) ad=99*ad/100; // O? 0907 if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>= loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) ad=97*ad/100; // ~O 0907 if ( loop(bp, dx/2, 0,dy,cs,0,DO) -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2 && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2 && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O if(box1->dots) ad=ad*94/100; if ( gchar) ad=99*ad/100; if (!hchar) ad=99*ad/100; Setac(box1,'D',ad); break; } // --- test d --------------------------------------------------- for(d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='d'; ) ad=100; //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80 if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break; if(dy>19) // 0..dx/3 dont work on serif d, 0..dx/4 is more safe if( get_bw(x0 , x0+dx/4, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; if( get_bw(x0 , x0+dx/4, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break; if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) > loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break; // more than dx/8 of width should show two horizontal lines (o-like) for (i=dx/8+1,x=0;x1) Break; // "d as a" patch if (i==1) ad=99*ad/100; // "d as a" patch, serif? for(i=dy/6+1,y=dy/4;y 3 ) i++; // ~al } if( i ) ad=98*ad/100; for(i=dy/8+1,y=0;yholes.num<1) Break; if (sdata->holes.num>1) { if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample) MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1);); if ( sdata->holes.hole[0].y0 < dy/4 ) Break; if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break; if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al if (dy>15) { Break; } else ad=96*ad/100; } if (!hchar) ad=98*ad/100; if ( gchar) ad=99*ad/100; Setac(box1,'d',ad); break; } return box1->c; } static wchar_t ocr0_F(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test F --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='F'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break; for (x=0,y=0;yx) x=j; } if (ydx/2 ) i=0; } if( i ) Break; x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke for( i=1,y=dy/3; ydx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; } if( i ) Break; y=dy/8; if (y<1) y=1; for( i=1; y=dx/3) { i=0; break; } } if( i ) Break; // check for vertical line on left side for(i=1,y=1;y<=dy/2 && i;y++) if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0; if( !i ) Break; for(i=1,y=dy/2;ydx/8 // no serif || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break; ad=99*ad/100; } if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break; if( loop(bp,0, bp->y/4,dx,cs,0,RI) < loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break; // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break; //if (sdata->holes.num > 0) Break; for(i=0,x=dx/4;xy/4,dx,cs,0,RI)-1; if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100; // check for screen font P i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE); if (i<1) { j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO); if (j<=dy/2) { i=loop(bp,bp->x-1,0,dx,cs,0,LE); ad=ad*98/100; if (i>dx/8) Break; if (i) ad=98*ad/100; } } if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4 if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r } if (gchar) ad=99*ad/100; Setac(box1,'F',ad); break; } return box1->c; } static wchar_t ocr0_uU(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i1,i2,i3,i4,i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ wchar_t bc=UNKNOWN; // --- test uU --------------------------------------------------- // in Mitte so breit wie oben (bei V kontinuierlich schmaler) for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='u'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames > 3) Break; // u+"u excludes tiny holes 1810.rnd80 if (aa[0][0]>x0+dx/3 || aa[0][1]>y0+dy/4) Break; // left top if (aa[1][0]>x0+dx/4 || aa[1][1]y0+dy/4) Break; // right top /* V.8x10 @...@ @...@ @...@ @...@ .@.@. ..@.. */ // upper gap from the top 2018-09 i1=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/2, y1); // lower gap from bottom (handwritten u?) i2=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1-dx/4, y1-dy/4); if (box1->frame_vector[i1][1]frame_vector[i2][1]frame_vector[i1][1]frame_vector[i2][1]p,cs) < 2 ) break; if( yi)i=y; if(y1) break; } if( idy/2)?dx/8:0),y,y,bp,cs); // left vert line if( y1 ) i--; // ~{\it v} if( y2) ) { i--; ad=90*ad/100; } if( y>dy/2 && j!=1 ) { i--; ad=98*ad/100; // handwritten u? MSG(fprintf(stderr,"ad=%d",ad);) } } if( !i ) Break; for(i=dy/16+1,y=dy/8;ydy/2 && (j<1 && j>2) ) i--; if( yp,cs,1) != 1 ) i=0; } if( i ) Break; for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--; } if( !i ) Break; for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0; if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } } if( !i ) Break; if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v} i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI); j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI); if( i15) if( loop(bp,dx-1,dy/16,dx,cs,0,LE) > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn) if( hchar && dy>7) if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16 && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16 && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16 ) Break; // melted ll i=loop(bp, 0,dy-1-dy/8,dx,cs,0,RI); // 2010-10 -2 to -1 j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); // -//- if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v if (i+j>=dx/2) ad=97*ad/100; if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>dy/16 ) ad=98*ad/100; // w if (ad==100) ad=99; // ToDo: only if lines.wt<100 bc='u'; if (gchar) ad=98*ad/100; if (hchar) bc='U'; if (box1->dots>0) ad=99*ad/100; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_micro(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test \mu µ MICRO_SIGN -------------------------------------- // in Mitte so breit wie oben (bei V kontinuierlich schmaler) if( gchar && !hchar ) for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='u'; ) //if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 for(y=y0+dy/8;ym3-dy/4;y++) if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break; if( ym3-dy/4 ) break; if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; for(y=dy/2;y5*dx) break; } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y; for(i=0,x=2*dx/8;xi)i=y; if(y1) break; } if( im4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler j=num_cross(0,dx/2,y,y,bp,cs); if( y1 ) i--; // ~{\it v} if( y2) ) i--; if( y>dy/2 && j!=1 ) i--; } if( !i ) break; for(i=dy/16+1,y=dy/8;ym4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler j=num_cross(dx-dx/2,dx-1,y,y,bp,cs); if( y>dy/2 && (j<1 && j>2) ) i--; if( yp,cs,1) != 1 ) i=0; } if( i ) break; for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--; } if( !i ) break; if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break; if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break; if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break; if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break; Setac(box1,MICRO_SIGN,ad); break; } return box1->c; } static wchar_t ocr0_vV(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test v ------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4, ToDo: replace by vector-code DBG( wchar_t c_ask='v'; ) //if (sdata->holes.num > 0) Break; /* no tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 /* V.8x10 @...@ @...@ @...@ @...@ .@.@. ..@.. */ x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap if (x>3*dx/4 || yp,cs,1) != 1 ) Break; if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v? if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break; // UVW if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1) && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */ || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1) && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break; // UV if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; if (!hchar // 2010-10 && loop(bp,0 ,dy/6 ,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) // old Break; && loop(bp,0 ,dy/6 ,dx,cs,0,RI) // 2017-03-23 for 7x7 DOS-v >=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI) && dy>6 ) Break; if ( hchar && loop(bp,0 , dy/3 ,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/3 ,dx,cs,0,RI) && loop(bp,0 ,dy-1-dy/3 ,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/3+dy/6,dx,cs,0,RI) && dy>6 ) Break; // 2010-10 if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ? if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21 if (!hchar // 2010-10 && loop(bp,dx-1,dy/6 ,dx,cs,0,LE) >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && loop(bp,dx-1,dy/6 ,dx,cs,0,LE) // 2017-03 add 7x7-DOS-v >=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE) && dy>6 ) Break; x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u} #if 0 // measure thickness of lower v i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE); j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI) +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE); if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y #endif /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */ i=loop(bp, 0, 0,dx,cs,0,RI); i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness upper left i=loop(bp, 0, 1,dx,cs,0,RI); i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest i=loop(bp, 0,dy/4,dx,cs,0,RI); i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i; // thickness on 1/4 from up i=loop(bp, 0,dy/4+dy/32,dx,cs,0,RI); i=loop(bp, i,dy/4+dy/32,dx,cs,1,RI); if (i>i2) i2=i; // fat fonts i=loop(bp, 0,dy-1,dx,cs,0,RI); i=loop(bp, i,dy-1,dx,cs,1,RI); i3=i; // thickness, fix dy-1 2013-06 i=loop(bp, 0,dy-2,dx,cs,0,RI); i=loop(bp, i,dy-2,dx,cs,1,RI); if(i>i3) i3=i; // thiggest bottom if( y0 < box1->m2) if( i1-i2 > dx/32+2 && i3-i2 > dx/32+2 ) { /* tmp10/invalid_ogv.jpg 120x145 font may have jpg-artefacts */ DBG( IFV fprintf(stderr,"\nDBG v L%d i123= %d %d %d",__LINE__,i1,i2,i3); ) Break; // ~serif_Y } if( y0 < box1->m2 ) // uppercase V ? if( i1-i2 < dx/32+2 ) /* no serif detected */ if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){ j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI); j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI); if (jloop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100; if (num_cross(0,dx-1, dy/2, dy/2,bp,cs)==1 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 1) Break; // 5x8 * Jul09 if (gchar) ad=99*ad/100; bc='v'; if( hchar ) bc='V'; Setac(box1, bc, ad); break; } return box1->c; } static wchar_t ocr0_rR(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test r ------- for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font DBG( wchar_t c_ask='r'; ) if (sdata->holes.num > 0 && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp ) Break; /* tolerant against a tiny hole */ if( 2*dym3-box1->m1) Break; if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break; x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t if( dx>4 ) if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00 i=dy-(dy+20)/32; // ignore dust on the ground for( y=4*dy/8; y3*dx/8) break; i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break; if( (i1+(dx-i2 -1))/2 >= 4*dx/8 ) break; // mass middle should be left } if (y5*dx/8 // not a C && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break; if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8 && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8 && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c if( loop(bp, 0,3*dy/8,dx,cs,0,RI) > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) { if( loop(bp, 0, dy/8,dx,cs,0,RI)3*dx/4 ) Break; // ~i if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break; if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1 && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3 // itallic t is sometimes not high enough, look for v-like shape for(y=3*dy/4;y1) {ad=95*ad/100;MSG({})} // ~f if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2 && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f // 2010-10 ocr-a-subset 4x8 '' was detected as bad flying r i1=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/3, y0+dy/4); // i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/3, y0+dy/4); if (box1->m2 && 2*box1->y0 <= box1->m1 + box1->m2 && 3*box1->y1 <= box1->m2 + 2*box1->m3 && box1->frame_vector[i1][1]-y0==0) { ad=97*ad/100;MSG(fprintf(stderr,"ad %d",ad);) } if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */ if (hchar) ad=96*ad/100; if (gchar) ad=97*ad/100; Setac(box1,'r',ad); break; // not 100% sure! } // --- test R --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='R'; ) // if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80 if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8 && loop(bp, dx/2,dy/16,dx,cs,0,RI)=16 ) Break; for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0; } if( !i ) Break; for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0; } if( !i ) Break; for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap i=loop(box1->p,x,y1,dy,cs,0,UP); /* on small chars bypass possible low left serifs */ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); if (i2>1) i+=i2-1; } if (i>y) { y=i; i1=x; } } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; } if( i ) ad=95*ad/100; /* sometimes there is a small gap */ for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x; x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x; x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight i2=loop(bp,0, dy/2,dx,cs,0,RI); i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break; if (dy>15) if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE) && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff if (dy>7) if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE) >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) { ad=98*ad/100; if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0 && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ?? } j=sdata->holes.num; if (j != 1) { i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL); // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); if (i==0) ad=90*ad/100; /* some times there is a small gap */ if (j>1 || j>i) Break; } if (sdata->holes.num < 1) ad=90*ad/100; if (sdata->holes.num==1) if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha if (!hchar) ad=98*ad/100; if ( gchar) ad=98*ad/100; Setac(box1,'R',ad); break; } return box1->c; } static wchar_t ocr0_m(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; const int hchar=sdata->hchar,gchar=sdata->gchar; int i,d,x,y,i1,i2,i3,i4,i5,i6,i7, handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ // --- test m ------- for(ad=d=100;dx>4 && dy>3;){ DBG( wchar_t c_ask='m'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=96*ad/100; if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 // added 2018 tmp13/sslmozFP.png molten "ity" d=2*sq(128/4); /* half distance to the center */ // if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */ if (aa[0][2]>d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ // search 3 legs and 2 space between, [][3]=vector_index i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1); // leg1 i3=nearest_frame_vector(box1, i1,aa[3][3], x1, y1); // leg3 i4=nearest_frame_vector(box1, i1, i3, x0, y0); // gap1 i5=nearest_frame_vector(box1, i1, i3, x1, y0); // gap2 i2=nearest_frame_vector(box1, i4, i5, (x0+x1)/2, y1); // leg2 MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);) // 2018-09 ToDo: check essentials of 3 legs if (box1->frame_vector[i1][0] > x0+dx/4) Break; // leg1 not leftmost if (box1->frame_vector[i4][1] > y0+dy/2) Break; // gap1 too low if (box1->frame_vector[i5][1] > y0+dy/2) Break; // gap2 too low if (box1->frame_vector[i5][0] < x0+dx/2) Break; // gap2 too left if (box1->frame_vector[i5][0] < box1->frame_vector[i2][0]) Break; // gap2 more left than leg2 // check right side molten 'nt' of tmp13/sslmozFP.png i6=nearest_frame_vector(box1, i3, aa[3][3], x1+dx/2, y0+dy/8); i7=nearest_frame_vector(box1, i3, i6, x0, y0+dy/2); if (box1->frame_vector[i7][0]frame_vector[i6][0]-dx/8 && hchar) Break; // ad=97*ad/100; MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d ad=%d",\ i1,i2,i3,i4,i5,i6,i7,ad);) // check up side molten 'ity' of tmp13/sslmozFP.png i6=nearest_frame_vector(box1, aa[3][3], aa[0][3], x0+dx/2, y1); if (box1->frame_vector[i6][1]>y0+dy/4 && hchar && gchar) Break; // ad=97*ad/100; x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K y=dy/2; i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3) i=num_cross(0,dx-1,y+1,y+1,bp,cs); if (num_cross(0,dx-1,dy/2,dy/2,bp,cs)==1) Break; // 5x8 * Jul09 if (i<3 && i>5) Break; // m ru rn, handwritten m // im or glued.mm cut to nm if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } if (i>=5) { // melted rw ? (Oct08 JS) x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // 1st v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i1=x-i1; // 2nd v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i2=x;// 2nd gap x+=loop(bp,x,y,dx-x,cs,0,RI); i2=x-i2; // 3th v-line // printf("\nDBG i1,i2 %d %d", i1, i2); if (i1>2*i2) Break; // rw } for (i=0,y=dy-1-dy/8;y>dy/2;y--) { i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break; } if (i>3) Break; for ( ;y>dy/2;y--) { i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break; } if (i>5) Break; y++; i5=y; if (y> dy/2) handwritten=10; if (y>3*dy/4) handwritten=60; /* @@............... @@......,........ @@,...@@@....@@@. @@,,.@@@@..@@@@@, @@@.@@@@@.@@@@@@, @@;@@@@@@@@@;,@@, @@@@@,.@@@@,,,@@@ <- i5 ,@@@...;@@....@@@ .@;...........,@@ ...............@@ i1 i2 i3 i4 */ x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line if (x>=dx) Break; // missing 3th v-line, ~W MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);) if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn // the same game for the lower part =>l1 l2 l3 l4 ??? i =loop(bp,0,5*dy/8,dx,cs,0,RI); i =loop(bp,i,5*dy/8,dx,cs,1,RI); x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI); x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI); if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */ for(y=0,x=i1;xy) y=i; } if(yy) y=i; } if(y=dy/2 ) break; if(xi4-i3+dx/16){ for(y=0,x=(i1+i2)/2;xy ) y=i; if( 2*i3 ) Break; // melted WT x=loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)dy/2) Break; // N // {\it m} if( loop(bp,1, dy/4,dx,cs,0,RI) >loop(bp,0,7*dy/8,dx,cs,0,RI) ) Setac(box1,'m',98*ad/100); if (handwritten<10){ x =loop(bp,0,dy/4,dx,cs,0,RI); x+=loop(bp,x,dy/4,dx,cs,1,RI); for( ;x=dy/4) ad=99*ad/100; if (i>(dy+2)/4) ad=95*ad/100; if (3*i>dy) Break; } if(xdots) ad=99*ad/100; Setac(box1,'m',ad); if (ad>=100) return 'm'; break; } return box1->c; } static wchar_t ocr0_tT(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,i1,i2,i3,i4,i5,i6, j,d,d2,x,y,yb, hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test T --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='T'; ) // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 if (box1->num_frames>1){ if (4*box1->frame_vol[1]frame_vol[0]) Break; // ~? ad=ad*99/100; // ~? ocr-b.png 2010-10-07 } /* half distance to the center */ d=2*sq(128/4); /* square */ /* now we check for the 3 edges of the T, but failes glued tmp13/ssl */ if (aa[0][2]>d) Break; /* square distance to upper left corner */ if (aa[3][2]>d) Break; /* square distance to upper right corner */ if (aa[3][0]-aa[0][0]2*dx/3) Break; /* lower line to long? ~t 7x9 */ if (aa[1][1]-aa[0][1]dy/8) ad=99*ad/100; if (aa[0][1]-y0>dy/8) ad=99*ad/100; if (aa[2][0]>=aa[3][0]) ad=98*ad/100; // ~t 7x9 if (aa[2][0]>=aa[3][0]-1) ad=99*ad/100; // ~t 7x9 // detect bottom end of vertical line 2018-09 i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1+dy/8); i2=nearest_frame_vector(box1, aa[1][3],aa[3][3], x1, y1+dy/8); // detect top end of vertical line i3=nearest_frame_vector(box1, aa[0][3],aa[1][3], x1, y0); i4=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y0); // serifen at bottom? if i1==i6 + i2==i7 no serifen, ad*99%? i5=nearest_frame_vector(box1, i3, i1, x1, y1); // left i6=nearest_frame_vector(box1, i2, i4, x0, y1); // right if (box1->frame_vector[i3][0]-x0 > 3*dx/4) Break; // ~7 if (box1->frame_vector[i4][0]-x0 >= dx-dx/8) Break; // ~7 if (box1->frame_vector[i3][1] != box1->frame_vector[i4][1]) { ad=99*ad/100; MSG({}) } // if ( box1->frame_vector[i3][0]-x0 > 2*(x1-box1->frame_vector[i4][0])) Break; // ~7 MSG(fprintf(stderr,"i1-6 %d %d %d %d %d %d ad %d",i1,i2,i3,i4,i5,i6,ad);) // old pixel code ... (ToDo replace) // upper horizontal line i1= loop (bp, dx/8, 0,dy,cs,0,DO); // left side i2= loop (bp,dx-1-dx/8, 0,dy,cs,0,DO); // right side i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side if (abs(i1-i2)>=dy/8) { ad=ad*99/100;MSG({}) } // ~ ocr-b '?' 2010-10 if (i1>dy/4 || i2>dy/4) Break; for (x=dx/8;xi1+dy/8 && i>i2+dy/8) break; if (i16 && y>0 && i+j32 && y>1 && i+jd2) d2=j; // found } if (3*d2dx+1 || i+j>=dx || i3*x) break; //~I } if( y3*dx/4) Break; // ~7 i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break; if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break; if (box1->m3 && 2*y1>box1->m3+box1->m4 && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4 && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) { ad=96*ad/100; MSG({}) }// ~J if (gchar) ad=98*ad/100; if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) { ad=99*ad/100; MSG({}) }// ~J i = loop(bp,0,dy/2,dx,cs,0,RI); // middle of vert. line j = loop(bp,i,dy/2,dx,cs,1,RI); // thickness if( 2*i>/*=*/dx ) { ad=99*ad/100; MSG({}) } if( 2*(dx-j-i)=100) return 'T'; break; } // --- test t --------------------------------------------------- // written t can look like a + or even with missing right side // smallest t found in win-screenshot (prop-font) dx=2 for(ad=d=100;dx>1 && dy>3/*&& dy>=box1->m3-box1->m2-1 2017_7x9*/;){ // sometimes no hchar! DBG( wchar_t c_ask='t'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection? if (box1->num_frames>1){ if (4*box1->frame_vol[1]frame_vol[0]) Break; // ~! dosemu ad=ad*99/100; // ~! 2017_dosemu_4x10 } /* old pixel code ... ToDo17 replace */ if (num_cross(0,dx-1,0 ,0 ,bp,cs) != 1) { // font4x5 *=x+- if (dy<10) Break; ad=98*ad/100; } if (num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1) { // *=x+- if (dy<10) Break; ad=98*ad/100; } for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line i=loop(bp,0,y,dx,cs,0,RI); i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line i=num_cross(0,dx-1,y ,y ,bp,cs); j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break; if( y<11*dy/16 && num_cross(0,dx-1,y ,y ,bp,cs) != 1 && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise ) break; } if( y<4*dy/8 ) Break; if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8) if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP) <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 ) if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C if (x=dx && 9*dx>=8*dy) { ad=99*ad/100; MSG({}) } // + if (box1->y1 < box1->m3 - (box1->m3-box1->m2+1)/32 // ToDo && no bottom serif && h-line near (m2+m3)/2, not m2 = "+" && x0+loop(bp,dx/4,0,dy,cs,0,DO) >= (3*box1->m2+box1->m3)/4 && loop(bp,0,dy/4,dx,cs,0,RI) >= dx/2-dx/8 && 10*dx>=8*dy ) { ad=98*ad/100; MSG({}) } // + i=loop(bp,dx-1,0,dx,cs,0,LE); for(y=0;y1) break; i=j; } if( yi ) break; if( y==yb && yb>dy/8) Break; // 2017-07 fix 6x8t on 7x9-font j=loop(bp,0, dy/2,dx,cs,0,RI); j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness j=loop(bp,0, dy/4,dx,cs,0,RI); j=loop(bp,j, dy/4,dx,cs,1,RI); if (j=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ??? j=1; for(y=1;j && y=cs && getpixel(bp,x+1,y )>=cs && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; } } if(!j) Break; if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2 && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er) if( dy>= 16 && loop(bp, 0, 3*dy/4,dx,cs,0,RI) >=loop(bp, 0, dy-2,dx,cs,0,RI) && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) <=loop(bp,dx-1, dy-2,dx,cs,0,LE) && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16 loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16 || loop(bp,dx-1, 0,dx,cs,0,LE)==0 || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02 if(dx<8 && dy>12){ // thin f's could easily confound with t x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE); if (x) if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16 && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1 >=loop(bp, 0, dy-2,dx,cs,0,RI) && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break; } if (dx>7) if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1 && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0 && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 ) if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6 // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6 if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2 && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){ ad=99*ad/100; /* italic t ? */ if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h } x= loop(bp,dx-1,dy/2,dx,cs,0,LE); i= loop(bp,dx-1,dy/8,dx,cs,0,LE); if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */ x= loop(bp,0, 0,dx,cs,0,RI); if (yb>1) {i= loop(bp,0, 1,dx,cs,0,RI); if (i1) Break; // l // 2010-10 $ (S + vert. lines at bottom and top, not crossing the S) i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x1+2*dx, (y0+y1)/2); i2=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0-2*dx, (y0+y1)/2); if (box1->frame_vector[i1][0]-x0>3*dx/4 && box1->frame_vector[i2][0]-x0< dx/4 && box1->frame_vector[i1][1]-y0 >box1->frame_vector[i2][1]-y0) Break; // this happens quite often, do not be to strong if (!box1->m2) ad=99*ad/100; if (box1->m2) { if (!hchar) ad=99*ad/100; /* some times t is not long enough */ if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */ if( y0>=box1->m2 ) ad=99*ad/100; /* to short */ } if (dx<3) { ad=ad*99/100; // ~ 2x11 ) if (loop(bp,0, 0,dx,cs,0,RI)==0 && loop(bp,0,dy-1,dx,cs,0,RI)==0 && loop(bp,dx-1,(dy+1)/2,dx,cs,0,LE)==0) Break; // ) } if (loop(bp,0, 0,dx,cs,0,RI)<=dx/8 // 2010-09-26 && loop(bp,dx-1,dy/2+1,dx,cs,0,LE)<=dx/8) { ad=ad*98/100; // ~) 3x11 MSG(fprintf(stderr,"ad= %d",ad);) } if (sdata->holes.num > 0) ad=95*ad/100; if (gchar) ad=99*ad/100; if (box1->dots) ad=90*ad/100; Setac(box1,'t',ad); break; } return box1->c; } static wchar_t ocr0_sS(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t ac; // --- test sS near 5 --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font) DBG( wchar_t c_ask='s'; ) //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80 if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3 && num_cross(6*dx/8,2*dx/8,0,dy-1,bp,cs)!=3 // 6x7 fat s 2010-09-25 && dy>4 ) Break; if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1 && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break; // get the upper and lower hole koords y=(dy+2)/4; // 6x7 fat s 2010-09-25 x =loop(bp,0,(dy+2)/4,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */ x +=loop(bp,x,(dy+2)/4,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */ i1 =loop(bp,x,(dy+2)/4,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x y=11*dy/16; // not safe for 6x7 fat font x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break; x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break; if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})} i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x MSG(fprintf(stderr,"~3 upper right gap (i1=%d,dy/4)",i1);) // use i3 temporary for x of left upper border of slanted S i3 =loop(bp,0,(dy+2)/4,dx,cs,0,RI); for( y=dy/4;yi3+dx/8 && loop(bp,x-1,y,dx,cs,0,UP)>dy/8+1) break; // +1 for s font 5x8 Jul09 } if( ydx/8) break; } if(y==dy/2) Break; // Mai00 y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO); if (!joined(bp,0,y,i2,11*dy/16,cs)) Break; // ? 6x7 fat s tmp08/gocr0801_bad5 if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > (dy+2)/4) Break; // ??? // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; i1=loop(bp,dx-1,dy-1,dx,cs,0,LE); i2=loop(bp,dx-1,dy-2,dx,cs,0,LE); if (i2-i1 >= dx/4) Break; // ~{ 5x7font i1=loop(bp, 0, 0,dx,cs,0,RI); i2=loop(bp, 0, 1,dx,cs,0,RI); if (i2-i1 >= dx/4) Break; // ~} 5x7font // sS5 \sl z left upper v-bow ? i1=loop(bp, 0,dy/2,dx,cs,0,RI); i1=loop(bp, i1,dy/2,dx,cs,1,RI); if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment i1=loop(bp,0, dy/16,dx,cs,0,RI); i2=loop(bp,0,4*dy/16,dx,cs,0,RI); i3=loop(bp,0,7*dy/16,dx,cs,0,RI); if( 2*i2+dx/32 >= i1+i3 ){ if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break; // very small s? i1+=loop(bp,i1, dy/16,dx,cs,1,RI); i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI); i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI); if( 2*i2+dx/32 >= i1+i3 ) Break; } for(y=(7*dy+8)/16;y<(5*dy+4)/8;y++){ if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 ) if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 ) if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5 } if(y<5*dy/8) Break; // v0.2.4a5 if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE) > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00 ac='s'; if (gchar) { ad=98*ad/100; MSG(fprintf(stderr,"gchar=bad");) } if ( loop(bp, dx-1, 0,dx,cs,1,LE) // ToDo: improve > loop(bp, 0,dy-1,dx,cs,1,RI)+dx/8 ) ad=98*ad/100; // ~5 4x5 font if( hchar ){ // (slanted) S but 5 is very similar! check it ac='S'; if ( loop(bp,3*dx/4, 0,dy,cs,1,DO) // ToDo: improve > loop(bp, dx/4,dy-1,dy,cs,1,UP) ) ad=99*ad/100; // ~5 if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE) > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5 if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI) > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5 } Setac(box1,ac,ad); break; } return box1->c; } static wchar_t ocr0_gG(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test g --------------------------------------------------- /* some g's have crotchet at upper right end, so hchar can be set */ // ~italic g for(ad=d=100;dx>2 && dy>4;){ // min 3x5 DBG( wchar_t c_ask='g'; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 2) ad=99*ad/100; if (sdata->holes.num < 1) Break; // ToDo: if 2 holes, loewer hole should be below baseline ~8 if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break; if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 ) if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break; for (i=0;iholes.num;i++){ // check for upper hole if (sdata->holes.hole[i].y1 < 5*dy/8+1) break; } if (i==sdata->holes.num) Break; // no upper hole found if (sdata->holes.num>1){ // 2018-09 check for low hole for (i=0;iholes.num;i++){ if (sdata->holes.hole[i].y0 >= box1->m3-dy/4-y0) break; } if (i==sdata->holes.num) Break; } // no lower hole found 2018-09 nums1 // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break; for(y=dy/4;y=15*dy) Break; // ~B if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) { ad=98*ad/100; // ~& if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100; if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100; } // looking for a gap for (x=0,y=dy/4;yx) x=i; } // in a good font x is greater dx/2 if (xp,cs) > 2 || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; } if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o /* test for horizontal symmetry ~8 */ for (y=0;ym4==0) ad=98*ad/100; if ( hchar) ad=96*ad/100; if (!gchar) ad=96*ad/100; // tmp12/nums1.jpg '8' ? ad=98*ad/100; Setac(box1,'g',ad); break; } // --- test rundes G --------------------------------------------- for(ad=d=100;dx>3 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='G'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S for( y=y0+dy/4;yp,cs,1) == 0 ) break; if( y==y1-dy/3 ) Break; // no gap if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1 || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e x=x0; y=y1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow? if( yp,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( xp,&x,&y,x0,x1,y0,y1,cs,LE,ST); turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( xp,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle if( x<=x1 ) Break; if( yy1-dy/4 ) Break; x=x1-dx/3;y=y1; // follow left C-bow, filter S turmite(box1->p,&x,&y,x0,x1,y0+dy/3,y1,cs,LE,UP); // w=LE b=UP // MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) if( y>y0+dy/3+1 ) Break; /* leave box below for S or on top for CG */ /* if (yp,&x,&y,x0,x1,y0 ,y1,cs,RI,UP); MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) if( y>y0 ) Break; if (sdata->holes.num > 0) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break; if( dx>4 && dy>6){ // no (<[ for(i=1,y=0;i && y Z if( xi ) i=x; } if( yi){ i=x;i1=y; } } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ? // check from above for gap and left vertical line (~S) x =loop(bp,0,i1,dx ,cs,0,RI); x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100; MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);) i=1; // Mar06: adapted to 4x6 font for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if(i) ad=95*ad/100; // ~C if(!hchar) ad=98*ad/100; if( gchar) ad=98*ad/100; Setac(box1,'G',ad); break; } // --- test \it g like 9 ---------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='g'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ // ToDo: if 2 holes, loewer hole should be below baseline ~8 if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break; for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe j=loop(box1->p,x0,y,dx,cs,0,RI); if( j>2 && j>dx/4 && yp,x0+j-2,y+1,dx,cs,0,RI)-2; if( j>x ) { x=j; i=y; } } if( x<4*dx/8 ) Break; if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1 && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break; if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break; if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break; if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 && loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe j=loop(box1->p,x1,y,dx,cs,0,LE); if( j>x ) { x=j; i=y; } } if( x>dx/2 ) Break; // no g i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break; i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break; i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); for(y=dy/4;y0){ x--; // robust y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP); if(yp,cs) > 2) ad=90*ad/100; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; if (box1->m4==0) ad=98*ad/100; if ( hchar) ad=96*ad/100; if (!gchar) { if (box1->m4 - box1->m3 > 2) ad=96*ad/100; else { if (y1 > box1->m3) ad=99*ad/100; else ad=97*ad/100; } } if (ad>99) ad=99; // never be sure to have a 9 Setac(box1,'g',ad); // break; } return box1->c; } // rewritten for vector usage v0.41 static wchar_t ocr0_xX(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; // pix *bp=sdata->bp; // obsolete int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs; int dx=x1-x0+1, dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test xX --------------------------------------------------- // rewritten for vectors 0.41 for(ad=d=100;dx>2 && dy>3;){ // min 3x4 int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges DBG( wchar_t c_ask='x'; ) if (sdata->holes.num > 0) Break; /* # */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0] dy/16 && aa[3][1]-y0 > dy/16 && y1-aa[1][1] > dy/16 && y1-aa[2][1] > dy/16) Break; // 10x10 ~* (X + vert. line) /* searching for 4 notches between neighbouring ends */ // JS-2010-09 tmp08/0811qemu2_crop (left top above right top?) if (box1->m2-box1->m1>2 && 3*(aa[3][1]-aa[0][1])>=2*(box1->m2-box1->m1)) Break; // ~k /* 2009-07: false positive on font4x5 '*' = '-' | 'x' */ /* 2010-08: +/-(0,dy/4) to +/-(dx/4,dy/3) (handwritten x) */ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0+dx, y0+dy/3); i3=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0+dx, y1-dy/3); i2=nearest_frame_vector(box1,i1, i3, x0+dx/4,y0+dy/2); MSG(fprintf(stderr,"left gap i132 %d %d %d",i1,i3,i2);) if (box1->frame_vector[i2][0]<=x0+ dx/8) Break; if (box1->frame_vector[i2][0]<=x0+ dx/4) ad=98*ad/100; i1=nearest_frame_vector(box1,aa[2][3],aa[3][3],x0 ,y0+dy/3); i3=nearest_frame_vector(box1,aa[2][3],aa[3][3],x0 ,y1-dy/3); i2=nearest_frame_vector(box1,i1, i3, x1-dx/4,y0+dy/2); MSG(fprintf(stderr,"right gap i132 %d %d %d",i1,i3,i2);) if (box1->frame_vector[i2][0]>=x1- dx/8) Break; if (box1->frame_vector[i2][0]>=x1- dx/4) ad=98*ad/100; /* only left side */ for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][0] >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i1=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[0][0]+aa[1][0]-2*x>=0) Break; if ( aa[1][0] >= x ) Break; if ( aa[0][0] > x ) Break; if ( aa[0][0] >= x ) ad=99*ad/100; if (x-x02*sq(1024/4)) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, j, aa[1][3]); MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only lower side */ for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][1] <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i2=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[1][1]+aa[2][1]-2*y<=0) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, aa[1][3], j); MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* check if lower right and center point are joined directly */ ld=line_deviation(box1, j, aa[2][3]); MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only right side */ for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][0] <=box1->frame_vector[j][0]) j=i; /* notice most left vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i3=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[2][0]+aa[3][0]-2*x<=0) Break; if ( aa[3][0] <= x ) Break; if ( aa[2][0] < x ) Break; if ( aa[2][0] <= x ) ad=99*ad/100; if (dx-(x-x0)2*sq(1024/4)) Break; /* check if upper right and center point are joined directly */ ld=line_deviation(box1, j, aa[3][3]); MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only upper side */ for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][1] >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i4=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[3][1]+aa[0][1]-2*y>=0) Break; /* check if upper left and center point are joined directly */ ld=line_deviation(box1, aa[3][3], j); MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, j, aa[0][3]); MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; // center crossing of diagonal lines is small? if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break; if (gchar) ad=99*ad/100; bc='x'; if(hchar) bc='X'; Setac(box1,bc,ad); break; } // --- test \it x --------------------------------------------------- #if 0 for(ad=d=99;dx>4 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='x'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break; if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3 && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break; if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3 && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break; if( gchar ) ad=97*ad/100; if( hchar ) ad=96*ad/100; bc='x'; Setac(box1,bc,ad); break; } #endif return box1->c; } static wchar_t ocr0_yY(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i1,i2, i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ wchar_t bc=UNKNOWN; // --- test italic yY -------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='y'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=97*ad/100; /* half distance to the center, added 2018-09 */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~N... */ if (aa[0][2]>3*d/4) Break; /* upper left end, handwritten tolerance */ if (aa[1][2]frame_vector[i1][1]-y0<=(dy+2)/4 && box1->frame_vector[i1][0]-x0<=(dx+2)/4) Break; } if (aa[2][2]<(d+2)/4 && aa[0][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N i2=nearest_frame_vector(box1,aa[3][3],aa[0][3],x1,y1); // right N-gap? if (y1-box1->frame_vector[i2][1]<=(dy+2)/4 && x1-box1->frame_vector[i2][0]<=(dx+2)/4) Break; } MSG(fprintf(stderr,"i1-2 %d %d ad=%d",\ i1,i2,ad);) if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2 && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break; if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break; if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2 && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break; for(yc=y=0,xc=x=dx/4;xy){ yc=y=i;xc=x; } } if( y>12*dy/16 || y<3*dy/8 ) Break; ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break; yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break; for(y=dy/8;y6*dx/8) ad=99*ad/100; // why this??? if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break; // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd); if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break; // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break; // printf("ok"); bc='y'; if(gchar && !hchar) bc='y'; else if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ??? Setac(box1,bc,ad); break; } // --- test yY --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='y'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ /* half distance to the center, added 2018-09 */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~N... */ if (aa[0][2]>3*d/4) Break; /* upper left end, handwritten tolerance */ if (aa[1][2]frame_vector[i1][1]-y0<=(dy+2)/4 && box1->frame_vector[i1][0]-x0<=(dx+2)/4) Break; } if (aa[2][2]<(d+2)/4 && aa[0][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N i2=nearest_frame_vector(box1,aa[3][3],aa[0][3],x1,y1); // right N-gap? if (y1-box1->frame_vector[i2][1]<=(dy+2)/4 && x1-box1->frame_vector[i2][0]<=(dx+2)/4) Break; } MSG(fprintf(stderr,"i1-2 %d %d ad=%d",\ i1,i2,ad);) if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) { if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; } else { if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; } if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2 && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break; if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 && num_cross(0,dx-1,dy-2-dy/32,dy-2-dy/32,bp,cs) != 1 ) Break; if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break; if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00 < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break; for(y=0,x=dx/4;xy) y=i; } if( y>10*dy/16 || y<2*dy/8 ) Break; for(xc=xb=xa=dx,yc=yb=ya=y=0;ydy/8) Break; for(i=dx,yc=y=dy/4;y<3*dy/4;y++){ if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break; x =loop(bp,dx-1 ,y,dx,cs,0,LE); x+=loop(bp,dx-1-x,y,dx,cs,1,LE); j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; } } yc+=dy/16+1; yc+=loop(bp,xc,yc,i,cs,1,DO)/2; xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2; xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2; yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break; /* check for serife at lower end */ for (i=0,x=dx-1;ix+dx/16+1) break; /* detect serif */ if (j=5*dy/8 && !gchar) if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 ) if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 ) { if (dx>4) { Break; } else ad=ad*98/100; } // ~V xa=loop(bp,0,dy/8,dx,cs,0,RI); xb=loop(bp,0,dy/2,dx,cs,0,RI); xc=loop(bp,0,dy-1,dx,cs,0,RI); if( 2*xb< xa+xc ) ad=98*ad/100; // ~V if( 2*xb<=xa+xc ) ad=98*ad/100; if( 2*xb<=xa+xc+1 ) ad=98*ad/100; bc='y'; if ((!gchar) && (!hchar)) ad=98*ad/100; if(y0m2-(box1->m2-box1->m1)/4) { bc='Y'; if(gchar) ad=98*ad/100; } // SMALL-CAPS ??? Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_zZ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,j,i1,i2,i3,i4,i5,dbg[9], d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test zZ ------- for(ad=d=100;dx>3 && dy>3;){ // dy>dx DBG( wchar_t c_ask='z'; ) /* for debugging purpose */ if (sdata->holes.num > 0) Break; if (box1->num_frames > 1) ad=98*ad/100; /* tolerant, tiny hole */ /* half distance to the center */ d=2*sq(128/4); /* square */ /* now we check for the 4 edges of the z */ if (aa[0][2]>d) Break; /* square distance to upper left corner */ if (aa[1][2]>d) Break; /* square distance to lower left corner */ if (aa[2][2]>d) Break; /* square distance to lower right corner */ if (aa[3][2]>d) Break; /* square distance to upper right corner */ if (aa[3][0]-aa[0][0]dy/8) ad=99*ad/100; if (aa[0][1]-y0>dy/8) ad=99*ad/100; // 2010-10-11 ~ joined ty (tmp08/gocr0801_bad5) // check bottom line to be low i= nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/2, y0); j= box1->frame_vector[i][1]; if (j2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break; /* search uppermost right > from left side */ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; if (y-y0 > 5*dy/8) Break; if (x-x0 < 3*dx/8) Break; if (x-aa[0][0]<=dx/4) Break; // ~lI if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI if (x-aa[0][0]< dx/2) ad=99*ad/100; // ~lI, dx/2 for fat 8x10 font /* search most right > ~2 from left side */ i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2); MSG(fprintf(stderr,"i23xy= %d %d %d %d ad %d",x-x0,y-y0,\ box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);) /* dy=7 i3.y=2 for fat screen font (Apr09) */ if ( box1->frame_vector[i3][1]-y0> (dy+2)/4 && box1->frame_vector[i3][0]-x>= 0) Break; if ( box1->frame_vector[i3][1]-y> dy/8+1 // +1 needed for 5x7 Z Jul09 && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100; if ( box1->frame_vector[i3][1]-y> dy/8+1 && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100; if ( box1->frame_vector[i3][1]-y> dy/16+1) ad=99*ad/100; // 9x10 ~2 if (box1->frame_vector[i3][0]-aa[0][0] < aa[3][0]-box1->frame_vector[i3][0]) Break; // ~lI if (box1->frame_vector[i3][0]-aa[0][0] <(aa[3][0]-box1->frame_vector[i3][0]-1)*2) ad=98*ad/100; // ~lI /* better test for a bow or peaked angle */ /* upper part of a 2, on a Z a and b should be at c .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25 ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18 ..$@@$@@@$@@@@@... ..@@@.....$$@@@@.. ..@@.......@$@@@b. ..$.........$@@@@. .$$..........$@@@. .$...........@@@@. .............@@@@.< .............$@@$. ............$@@@.. ............@@$... ............$@$... --- snip ---- */ /* upper right corner? */ i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0); i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx); d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0]) +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]); if (d>2*sq(dx/8+1)) Break; if (d>0 && dx<8 ) ad=99*ad/100; // 7x10 2 ~Z if (d>1 && dx<16) ad=98*ad/100; // 9x10 2 ~Z d=2 MSG( fprintf(stderr,"i45xy %d %d d %d ad %d upper right bow?",i4,i5,d,ad); ) /* check if upper left and upper right point are joined directly */ dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break; /* check if lower right and upper left point are joined directly */ dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break; /* search highest left < from right side 2017-07 ~I 5x9 */ i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0); x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; if (y-y0 <= dy/8 && x-x0 <= 5*dx/8) Break; // ~I 2017-07_clean5x9 if (y-y0 <= dy/4 && x-x0 <= 5*dx/8) ad=98*ad/100; // ~I /* search lowest left < from right side */ i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1); x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; if (y-y0 < 3*dy/8) Break; if (x-x0 > 5*dx/8) Break; if (x-x0 >= dx/2) ad=99*ad/100; // ~I 2017-07_clean5x9 if (aa[2][0]-x<=dx/4) Break; // ~lI if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI if (aa[2][0]-x< dx/2) ad=99*ad/100; // ~lI, dx/2 for 9x7 fat3 z /* check if upper right and lower left point are joined directly */ dbg[2]=d=line_deviation(box1,i2, aa[3][3]); MSG(fprintf(stderr,"left diag line? i2xy %d %d d= %d krit= 2..3*%d",\ x-x0,y-y0,d,sq(1024/4));) if ( d >3*sq(1024/4)) Break; if (dx>9 && d >2*sq(1024/4)) Break; // to strong for 5x7 Z /* check if lower left and lower right point are joined directly */ dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break; MSG( fprintf(stderr," i12 %d %d ad %d", i1, i2, ad); ) /* upper right nick - lower left nick, 0 for fat screen fonts 8x10 */ if (box1->frame_vector[i1][0] -box1->frame_vector[i2][0]frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ dbg[0], dbg[1], dbg[2], dbg[3], sq(1024)/16, ad);) ad=(100-dbg[0]/sq(1024/4))*ad/100; ad=(100-dbg[1]/sq(1024/4))*ad/100; ad=(100-dbg[2]/sq(1024/4))*ad/100; ad=(100-dbg[3]/sq(1024/4))*ad/100; if ( gchar) ad=98*ad/100; bc='z'; if( hchar ) bc='Z'; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_wW(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */ wchar_t ac; // ------- test w ~{\it w} --------------- for(ad=d=100;dx>3 && dy>3;){ // dy<=dx DBG( wchar_t c_ask='w'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // xa xe // \ xc / <=ya connected xa-xb-xc-xd-xe // xb xd <=yb // get two lowest points i3,i4,ya // out_x(box1); // ~ul ~uf // out_x(box1); for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break; if(y4) { /* 4x6 is to small */ for(y=dy-1-dy/16;y>3*dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break; if(y==3*dy/4) Break; } yb=y; t1=loop(bp,0 ,dy/4,dx,cs,0,RI); t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line? for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++; else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++; if(i+56 || dx>4)) Break; if(i+j==0 && dx<=4){ if (abs(loop(bp, 1,dy-1,dy,cs,0,UP) -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2 && loop(bp, 0,dy-1,dy,cs,0,UP)>0) || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2 && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV ad=ad*99/100; // 4x6 font MSG(fprintf(stderr,"ad=%d",ad);) } if( num_cross(0,dx-1, 1, 1,bp,cs)< 2 && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break; x =loop(bp,0 ,yb,dx,cs,0,RI); xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break; x =loop(bp,dx-1 ,yb,dx,cs,0,LE); xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; for(y=0,xc=x=xb+1;xy){xc=x;y=i;} if(dx>4 && !y) Break; ya=dy-1-y; // flat y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--; if (dy>6 || dx>4) { // ~4x6 font if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2 && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2 && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; } ya-=y/2; x =loop(bp,0 ,1 ,dx,cs,0,RI); xa=loop(bp,x ,1 ,dx,cs,1,RI); if( x+xa>xb ){ // may be, here is a small but thick letter // later add some proofs xa=x+xa/4; } else { xa=x+xa/2; } x =loop(bp,dx-1 ,1 ,dx,cs,0,LE); xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2; MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d", xa,1,xb,yb,xc,ya,xd,yb,xe,1);) if (ya94 ) break; if (x==xa+i) Break; // no vert. line found if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95 && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95 && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break; if( get_line2(xc, ya,xd, yb,bp,cs,100)<95 && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break; if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95 && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break; // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; // ~ur MSG(fprintf(stderr,"ad=%d",ad);) for(i=0,y=5*dy/8;yi ) i=x; if( x3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar) DBG( wchar_t c_ask='w'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // ~ul ~uf if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break; if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40; if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80; for(i=0,y=0;ydx/2) Break; xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break; x =loop(bp,dx-1 ,yb,dx,cs,0,LE); x+=loop(bp,dx-1-x,yb,dx,cs,1,LE); xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break; if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break; if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break; if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break; if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break; // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; if (sdata->holes.num != 0) Break; // ~ur for(i=0,y=3*dy/4;yi ) i=x; if( xc; } static wchar_t ocr0_aA(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test A --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='A'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ // first selection (rough sieve) if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1 && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B ya=0; /* upper end, not 0 for modified A etc. */ if (box1->modifier) for (ya=0;ya=dy/2) ya=0; // already subtracted? if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1 && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1 && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break; if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2 && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break; if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R for(y=ya+dy/8;y 1 ) break; if( y==ya+dy/2 ) Break; i1=y; if (dy>20) i1++; /* get arround some noise fat font */ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break; x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x; x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2; // hole (i2,i1) y+=loop(bp,i2,y,dy,cs,1,DO); y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) } if (y>5*dy/6) Break; if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1) || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break; // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break; // out_x(box1); i3=0;i4=0; for(x=dx/3;x<2*dx/3;x++){ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; if(i4==1) i3=x; } if(i4<1 || i4>2 || i3==0){ // ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);) Break; } if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break; i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE); i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE); i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE); if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */ if( 2*i2+dx/4i1+i3 ) Break; i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI); i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI); if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */ if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+120) ad=97*ad/100; // italic-a if (!hchar) ad=99*ad/100; // italic-a Setac(box1,'A',ad); break; } // new vector based fat a variant without holes 2010-10-11 // --- test a --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x5 DBG( wchar_t c_ask='a'; ) if (sdata->holes.num > 1) break; if (box1->num_frames != 2 + box1->dots ) break; /* tmp08/gocr0801_bad5 7x7 7x7qemu .@@@@@.<- .@@@@..<- .@@@@@@ ....@@. ..@@@@@ .@@@@@. @@@@@@@ @@..@@. @@...@@ @@..@@. @@@@@@@ @@..@@. .@@@@@@<- .@@@.@@<- */ if (aa[0][0]>x0+dx/4 || aa[0][1]>y0+dy/4) Break; if (aa[1][0]>x0+dx/4 || aa[1][1]y0+dy/4) Break; // ToDo: check position of 2nd frame (hole) using nearest_x and _y??? // rightmost gap from the left i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0+1*dy/3); if (box1->frame_vector[i1][0]<=aa[0][0] || box1->frame_vector[i1][0]<=aa[1][0]) Break; // must! if (box1->frame_vector[i1][0]< x0+dx/3) Break; // very tolerant if (box1->frame_vector[i1][0]< x1-dx/3) { // we allow one exeption where fat lines close the gap (see tmp08) // upper body must at least 2 times thicker than low line i= loop(box1->p,x0+dx/2,y0 ,y1-y0,cs,0,DO); if (i>dy/8) Break; i= loop(box1->p,x0+dx/2,y0+i,y1-y0,cs,1,DO); j= loop(box1->p,x0+dx/2,y1 ,y1-y0,cs,0,UP); if (j>dy/8) Break; j= loop(box1->p,x0+dx/2,y1-j,y1-y0,cs,1,UP); if (j>dy/3) Break; if (i<2*j-dy/16) Break; ad=98*ad/100; } if (box1->frame_vector[i1][1]>y0+dy/2) Break; i2=nearest_frame_vector(box1,i1 ,aa[1][3], x0-dx/2, y0+1*dy/2); MSG(fprintf(stderr,"i12 %d %d y=%d %d", i1, i2, box1->frame_vector[i2][1]-y0, y1-dy/4-1-y0 );) if (box1->frame_vector[i2][0]> x0+(dx+4)/8+1) Break; if (box1->frame_vector[i2][1]> y1-dy/4) Break; // ? for (i=0;inum_frames;i++) if (box1->frame_vol[i]<0) break; if (i>=box1->num_frames || i<1) Break; // no hole? hole only? i3= box1->num_frame_vectors[i-1]; i4= box1->num_frame_vectors[i]-1; MSG(fprintf(stderr,"i34 %d %d = hole",i3,i4);) i5= nearest_frame_vector(box1, i3, i4, x0+dx/2, y0); MSG(fprintf(stderr,"i5 %d y %d",i5,box1->frame_vector[i5][1]-y0);) if (box1->frame_vector[i5][1]m2) { if (sdata->gchar) ad=98*ad/100; if (sdata->hchar) ad=98*ad/100; } else ad=99*ad/100; Setac(box1,(wchar_t)'a',ad); if (ad>=100) return 'a'; break; } #if 0 // old-pixel based variant // --- test a ------------------------------------------- // with a open bow above the circle starting // on the right side of the circle for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='a'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 ) if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 ) if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break; if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break; for( x=dx/4;xdy/2) break; i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; } if( xy-1, bp,cs) == 3 ) i--; } if( i ) Break; i1=loop(bp,0, dy/8,dx,cs,0,RI); i3=loop(bp,0,3*dy/4,dx,cs,0,RI); for(y=dy/8+1;y<3*dy/4;y++){ i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break; } if(y==3*dy/4) Break; // ~6 // ~ s (small thick s), look for vertikal line piece for(x=3*dx/4;xdy/4 ) break; if( x==dx ) Break; if (sdata->holes.num != 1) ad=96*ad/100; else if (sdata->holes.num == 1) if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break; // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break; if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){ i =loop(bp,0,dy/4,dx,cs,0,RI); i =loop(bp,i,dy/4,dx,cs,1,RI); if(ii) Break; // ~ 8 } /* test for horizontal symmetry ~8 */ for (y=0;ydy/4) Break; // ~ serif d if (hchar) ad=96*ad/100; if (gchar) ad=96*ad/100; Setac(box1,'a',ad); break; } #endif // --- test hand written a --------------------------------------------------- // rarely char, without bow above the circle for(ad=d=100;dx>3 && dy>3;){ // min 4x4 DBG( wchar_t c_ask='a'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; i = loop(bp,dx/2-dx/8, 0 ,dy,cs,0,DO); if (i>dy/4) Break; // top center i+= loop(bp,dx/2-dx/8, i ,dy,cs,1,DO); if (i>dy/2) Break; // top hole i = loop(bp,dx/2-dx/8, i ,dy,cs,0,DO); if (ip,cs,1) == 1 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break; y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break; // normal 'a' has a well separated vertical line right from the circle // but fat 'a' is like a 'o', only bigger on the right side if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */ && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */ && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 ) { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;} else ad=98*ad/100; } if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2 || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break; if (sdata->holes.num != 1) if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 ) // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4 && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100; x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break; for( x=dx/4;xdy/2) break; i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; } if( xp,cs) == 1 ) if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 ) if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4 && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q if (ad>99 && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/32 ) ad=98*ad/100; // ~o 2010 if (hchar) ad=96*ad/100; // 2010-10 ~O if (gchar) ad=98*ad/100; // handwritten-a (alpha) Setac(box1,'a',ad); break; } // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font ------------------------- /* not sure if we should move this to a get_CentralEuropean-function */ for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='A'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ // first selection (grobes Sieb) if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1 && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1 && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break; if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break; if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2 && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break; if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R for(y=dy/8;y 1 ) break; if( y==dy/2 ) break; i1=y; if (dy>20) i1++; /* get arround some noise fat font */ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break; x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x; x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2; // hole (i2,i1) y+=loop(bp,i2,y,dy,cs,1,DO); y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; if (y>5*dy/6) break; if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break; // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break; // out_x(box1); i3=0;i4=0; for(x=dx/3;x<2*dx/3;x++){ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; if(i4==1) i3=x; } if(i4<1 || i4>2 || i3==0){ // ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);) break; } if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break; /* dy/4 changed to dy/6 because of screenfonts */ /* there are strange fonts, one has a serif on the upper end of A */ if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break; if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if( 2*i2+dx/4i1+i3 ) break; i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie i2=loop(bp,0 , dy/2,dx,cs,0,RI); i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI); if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+12c; } static wchar_t ocr0_cC(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,t1; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test c,C --------------------------------------------------- for(ad=d=100;dx>2 && dy>2;){ // min 3x4 DBG( wchar_t c_ask='c'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[ for(y=y0+dy/4;yp,cs,1) == 0 ) break; if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap // measure thickness of line! t1=loop(bp, 0,dy/2,dx,cs,0,RI); t1=loop(bp,t1,dy/2,dx,cs,1,RI); if (t1>dx/2) Break; for(y=i1,i2=0,x=x0+dx/2;xp,x0+dx/2,i1,dy,cs,0,DO); if( i>i2 ) { i2=i; } } if(i2p,x0+5*dx/8,i1,dy,cs,0,UP); i =i1+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(iy0+ dy/4+t1/2) Break; // highest, i3: highest point below top-line for(y=i1;yp,cs,1) == 1 ) break; if( y-i1p,cs) < 1 ) Break; // ~L if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16) if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1 && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 // may fail && num_cross(x1 ,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r i=1; for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if(!i) ad=95*ad/100; // ~G i=loop(bp,0,dy/2,dx,cs,0,RI); for(y=0;y=dy/4;y--){ x =loop(bp,0,y,dx,cs,0,RI); x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x; i =loop(bp,x,y,dx,cs,0,RI); if(ii4+dx/32 ) break; // unusual for c, more a bad e? } if( y>=dy/4 ) Break; if( !hchar ){ // test for e where the middle line is partly removed x= loop(bp,0,dy/2,dx,cs,0,RI); x=x +loop(bp,x,dy/2,dx,cs,1,RI); y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1; i=x +loop(bp,x,y,dx,cs,1,RI); i=i +loop(bp,i,y,dx,cs,0,RI); if( num_cross(x ,x ,1,dy/2,bp,cs) > 1 || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 ) if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1 || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e } if( dy>16 && dy>3*dx && hchar ){ // ~[ x= loop(bp,0, dy/16,dx,cs,0,RI); x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI); i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2; if( i>=x ) if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break; } if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( (i7 ) if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE) > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE) && loop(bp,dx-1, dy/ 8,dx,cs,0,LE) > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~( // printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16); // ~G without characteristic crotchet if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4) if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){ Setac(box1,'G',90); Break; } if (hchar){ i=1; for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if (i) ad=98*ad/100; // ~( if (dy>2*dx) ad=99*ad/100; } if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100; i= loop(bp,dx-1,dy/16,dx,cs,0,LE); j= loop(bp,dx/2,0 ,dy,cs,0,DO); if (i>=dx/2 && j>dy/8 && j>2 && j=3*dx && dy>12) ad=99*ad/100; // ( i= loop(bp,dx-1,dy-1,dy,cs,0,UP); j= loop(bp,dx/2,dy-1,dy,cs,0,UP); if (i==0 && j>dy/8) ad=95*ad/100; // < i= loop(bp,dx-1, 0,dy,cs,0,DO); j= loop(bp,dx/2, 0,dy,cs,0,DO); if (i==0 && j>dy/8) ad=95*ad/100; // < if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // < if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // < if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // < if (gchar) ad=98*ad/100; // could happen for 5x7 font bc=((hchar)?'C':'c'); Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_lL(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test L --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='L'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ i=loop(bp,dx-1,dy/2,dx,cs,0,LE); if (i<3 && dy>8) {Break;} if (ip,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; } } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line // line thickness (i2) i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break; j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j; if (loop(bp,dx-1, 0,dx,cs,0,LE)dx/2 && loop(bp, 0,5*dy/8,dx,cs,0,RI)p,x0 ,y,dx,cs,0,RI); if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; } x=loop(box1->p,x0+j,y,dx,cs,1,RI); if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0; } if( !i ) Break; if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break; if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break; if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break; if (loop(bp,0,dy-1,dx,cs,0,RI) -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4 // tmp08/gocr0801_bad5_FP.jpg 6x9 1 2010-09-25 if (loop(bp,0,dy-2,dx,cs,0,RI) -loop(bp,0,dy-1,dx,cs,0,RI)>dx/4) Break; // ~l1 6x9 font if ( gchar) ad=98*ad/100; if (!hchar) ad=99*ad/100; if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l Setac(box1,'L',ad); break; } // --- test l --------------------------------------------------- // recognize a "l" is a never ending problem, because there are lots of // variants and the char is not very unique (under construction) // --- test italic l --------------------------------------------------- // --- test l ~italic (set flag-italic) -------------------------------- // if unsure d should be multiplied by 80..90% for (ad=d=100; dy>dx && dy>5;) { // min 3x4 DBG( wchar_t c_ask='l'; ) if (box1->dots>0) Break; if (num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1 || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1) Break; if (box1->num_frames>1 && box1->m3) Break; // ~! 2010-10-01 /* 2010-09-24 6x10 1 similar but not ~l sample gocr_codabar.png 6x10=1=~l 6x9=1=~l ..@@..<- .@@@.. @@@...<- ..@@.. @.@... ..@@..< ..@... ..@@.. ..@...< ..@@.. ..@... ..@@.. ..@... ..@@.. ..@... ..@@.. ..@... @@@@@@<- @@@@@@<- */ // mesure thickness for (i1=0,i2=dx,y=dy/4;yi1) { i1=j; } // thickest if (j2*i2) Break; if (box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100; if (box1->m2-box1->m1>1 && y0>=box1->m2) ad=94*ad/100; for (i0=0,i3=0,y=0;yi3) { i3=j; } // widest space j = loop(bp,j,y,dx,cs,1,RI); if (j>i0) { i0=j;i3=0; } // thickest } if (i0>4*i2 || 3*i3>2*dx) if (loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8 || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7 // detect serifs x =loop(bp,0, 0,dx,cs,0,RI); i3=loop(bp,x, 0,dx,cs,0,RI); x =loop(bp,0, 1,dx,cs,0,RI); x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x; x =loop(bp,0,dy-1,dx,cs,0,RI); i4=loop(bp,x,dy-1,dx,cs,0,RI); x =loop(bp,0,dy-2,dx,cs,0,RI); x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x; if (i3>i1+dx/8+1 && i4>i1+dx/8+1) Break; // ~I for(i=dx,j=0,y=1;yi+1) break; i=x; if( num_cross(0,dx-1,y ,y ,bp,cs)==2 && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1; } if (y3) if ( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 ) if ( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break; if ( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ??? if ( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 ) if ( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 ) if ( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100; if ( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ??? if ( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 ) if ( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 ) if ( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break; MSG( fprintf(stderr,"ad= %d", ad); ) for ( y=1;y<12*dy/16;y++ ) if ( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ? && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break; if ( y<12*dy/16 ) Break; if (dx>3) { for ( y=dy/2;yy-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick? i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); if ( i-2-dx/16>=x ) break; if ( i=dy/5 ) Break; // test ob linke Kante gerade for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick? i=loop(bp,0,y,x1-x0,cs,0,RI); if( i+2+dx/16x ) x=i; } if (y>=dy/5 ) Break; if (box1->m4 && y1m4) if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 ) ad=ad*97/100; // unsure !l| i=loop(bp,dx-1,dy/16,dx,cs,0,LE); j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if( i>3 && j>3 ) if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t for(y=5*dy/8;y8 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4 && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7 if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2 && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7 if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2 && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) { if (loop(bp,0,dy-1,dx,cs,0,RI)2*dy) ad=99*ad/100; // ~L if (5*dx>3*dy) ad=99*ad/100; // ~L MSG( fprintf(stderr,"ad= %d", ad); ) } if (!hchar) { // right part (bow) of h is never a l if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; } if( dx>3 && dy>3*dx ) if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4 && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){ ad=98*ad/100; // ~] if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break; } for(x=0;xi ) break; } if (x>=loop(bp,0,y+1,dx,cs,0,RI) ) if (loop(bp,0 ,0,dy,cs,0,DO)>1) if (loop(bp,0 ,0,dy,cs,0,DO) -loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1) Break; // ~1 Jul00,Nov00 if (num_cross(0,dx/2,y-1,y-1,bp,cs)==2) Break; // ~1 } if(dx<8 && dy<12){ // screen font i= loop(bp,0,0,dy,cs,0,DO); if ( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2 && loop(bp,0,dy/2,dx,cs,0,RI)>=2 && i>1 && i= dx) ad=98*ad/100; } if ( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2 /* ~] */ && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; // MSG( fprintf(stderr,"ad= %d", ad); ) i=loop(bp,dx-1,dy/2,dx,cs,0,LE); if ( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2 && (i1) ad=98*ad/100; // ~] JS-2010-09 add dx>1 if ( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line? if(dy>=12 && i>dy/8 && iloop(bp,dx-1, i,dx,cs,0,LE) || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i+1,dx,cs,0,LE) ) if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i,dx,cs,0,LE) || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i+1,dx,cs,0,LE) ) if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i,dx,cs,0,RI) || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i+1,dx,cs,0,RI) ) if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i,dx,cs,0,RI) || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8 && loop(bp, 0, i,dx,cs,1,RI)>=dx-1 && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8 && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t } // MSG( fprintf(stderr,"ad= %d", ad); ) // if( vertical_detected && dx>5 ) if ( loop(bp,0, 1,dx,cs,0,RI)>=dx/2 && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) ) if ( dx>1 && // important for dx/2 below ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8 || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 ) && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) { ad=98*ad/100; // ~/ MSG( fprintf(stderr,"ad= %d", ad); ) } if (loop(bp,0,dy/4,dx,cs,0,RI)>dx/2 && loop(bp,0,7*dy/8,dx,cs,0,RI)j) j=i; // top max width i=loop(bp,0,dy/16+2,dx,cs,0,RI); i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i; // top max width if (j*4>=dx*3) ad=98*ad/100; // ~z if (j*8>=dx*7) ad=96*ad/100; // ~z MSG( fprintf(stderr,"ad= %d", ad); ) // false z on 7x9font 3x9 l (left vertical + upper stub) } i=loop(bp,0,0,dy,cs,0,DO); if (3*i>dy && 3*i<2*dy && loop(bp,dx-1, 0,dy,cs,0,DO)==0 && loop(bp,dx-1,dy-1,dy,cs,0,UP)==0) { i=loop(bp, 0,dy-1,dy,cs,0,UP); if (3*i>dy && 3*i<2*dy) Break; // ~{} 6x10 ad=ad*99/100; MSG( fprintf(stderr,"ad= %d", ad); ) } // JS-2010-09 badly implemented for (dx=1)-"l"? //MSG( fprintf(stderr,"ad= %d", ad); ) //if ( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; //if ( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; if (ad==100) ad--; /* I have to fix that: (sample?) .@@@@.<- @@..@@ ....@@ ....@@< ...@@. ..@@@. ..@@.. .@@... @@.... @@@@@@<- */ MSG( fprintf(stderr,"ad= %d", ad); ) if(!hchar) ad=ad*99/100; if( gchar) ad=ad*99/100; // full rectangle? no white dots? I and l should have 98% and 99% // after context correction (JS2010-09) if (ad>98 && get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0) ad=99*ad/100; // 2010-10-01 sample tmp10/barcode_code128_145 if (dx<3 && dy>10 && box1->m4==0) ad=ad*95/100; // just a vertical line? Setac(box1,'l',ad); // if( i<100 ) Break; ???? // if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8 // && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8 // && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1; break; } return box1->c; } static wchar_t ocr0_oO(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i1,i2,i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test o,O --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='o'; ) if (sdata->holes.num !=1 ) Break; if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break; if (sdata->holes.hole[0].y0 > dy/3 || sdata->holes.hole[0].y1 < dy-1-dy/3) Break; if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2 && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen" if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI) -loop(bp,0 ,2 ,x1-x0,cs,0,RI)<=dx/16 ) ad=99*ad/100; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI) -loop(bp,0 ,2 ,x1-x0,cs,0,RI)<=dx/8 ) ad=98*ad/100; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)< loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; MSG(fprintf(stderr,"ad %d",ad);) x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum for( y=dy-1-dy/3;ydx/8 ) if( loop(bp,0 , dy/16,dx,cs,0,RI)dx/8 ) if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)p,cs,1) == 0 && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0 // && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1 && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1 || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D // search lowest inner white point, set i=y itallic a for(y=dy,j=x=0+dx/8;x 1 ) ad=99*ad/100; // ~a \it a MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);) for(y=0;y 2 ) ad=98*ad/100; // ~a \it a MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);) for(y=i1+dy/8;y 2 ) ad=98*ad/100; // 0 Q ? MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);) if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)m1-y0,box1->m4-y0);) if (loop(bp,dx-1,dy-1-dy/8,x1-x0,cs,0,LE)+1+dx/16 dy/8 || num_cross(0,dx-1, 0, 0,bp,cs) > 1 || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1 ) { ad=98*ad/100; MSG(fprintf(stderr,"ad %d",ad);) }// ~bq // corrections for wrong recognized m1,m2 (all chars of same high) if (hchar && 2*y0m1+box1->m2) i=1; else i=0; if (gchar) ad=99*ad/100; bc='o'; // ToDo: need line information, if m1-m4 is not sure if (i){ bc='O'; } if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */ if (bc=='O' && hchar && dy<10 && ad>=99 && dx=4*dx) { Setac(box1,'0',99); Setac(box1,'O',98);Break; } // small top 0 if (bc=='O' && hchar && dy<10 && ad>=99 && dx>=dy-dy/16) { Setac(box1,'0',98); // but output as 'o' Setac(box1,'O',99);Break; } // round O sslmozFP /* 2017: upper width: 3/5 for 5x7o, 2/6 for 6x9zero but teletext!*/ if (bc=='o' && (!hchar) && y0<=box1->m2) { // 2018-09 bad m1-m4? DBG( c_ask='O'; ) Setac(box1,'O',99*ad/100); DBG( c_ask='0'; ) Setac(box1,'0',99*ad/100); } if (ad==100 && bc=='o' && box1->m2 && abs((box1->m2 - box1->y0) - (box1->y0 - box1->m1)) <= (box1->m2 - box1->m1)/4) ad=98*ad/100; // unsure 2010-10-01 DBG( c_ask=bc; ) Setac(box1,bc,ad); DBG( c_ask='0'; ) if (bc=='O') Setac(box1,'0',ad); if (bc=='o') Setac(box1,'0',98*ad/100); break; } return box1->c; } static wchar_t ocr0_pP(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test pP --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='p'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break; if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break; if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break; i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (ip,cs) != 2 ) if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 ) if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break; if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 ) if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break; if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++; if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100; i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line i3= loop(bp, 0,7*dy/8,dx,cs,0,RI); i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line // out_x(box1);printf(" p:"); for ( y=dy/8; y<7*dy/8; y++ ){ x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break; } if ( y<7*dy/8 ) Break; for ( x=0,j=y=dy/3; yx ) { x=i; j=y; } if(x>dx/2) break; } if ( x=dx) Break; if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break; i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL); // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); j=sdata->holes.num; if (j!=1 && dx< 8) ad=96*ad/100; if (j!=1 && dx>=8) ad=98*ad/100; if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */ if (i>1 || j>1 || j>i) Break; // check for serif F i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE); i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO); if (j>dy/2) ad=80*ad/100; // its an serif-F if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100; bc='p'; if( hchar && ((!gchar) || dy<14)) bc='P'; if ( hchar && gchar) ad=98*ad/100; // \ss sz if ((!hchar) && !gchar) ad=98*ad/100; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_qQ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test Q --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='Q'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break; if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break; if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 ) if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL); i=sdata->holes.num; if(!i) Break; if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break; x=x1;y=y1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( x 5*dx/8 ) Break; // ~4 Okt00 x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break; if( loop(bp,dx-1-x,0 ,dy,cs,0,DO) <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4 if( loop(bp,dx-1,dy-2,dx,cs,0,LE) <= loop(bp,dx-1,dy/2,dx,cs,0,LE) ) if( loop(bp, 1,dy-1,dy,cs,0,UP) <= loop(bp,dx/2,dy-1,dy,cs,0,UP) ) if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 ) if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4 if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE) + loop(bp, 0,3*dy/4,dx,cs,0,RI) < loop(bp,dx-1,2*dy/4,dx,cs,0,LE) + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4 if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4 if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break; j=loop(bp,dx/2,dy-1,dy,cs,0,UP); if (j>1 && j>dy/8) { if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA if (j<5) ad=95*ad/100; else Break; } } // italic a for(i=0,y=0;y 2 ) i++; if(i>dy/8) Break; // ~a \it a if (i>0) ad=99*ad/100; // ~o look at the lower right side for falling line for(j=x=0,y=dy/2;yx){ x=i; } if (x-i>j) j=x-i; if( j>dx/16 ) Break; // falling line detected } if (j==0) Break; // no falling line => no Q if (j<=dx/16) ad=98*ad/100; if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare if(!hchar) ad=96*ad/100; Setac(box1,'Q',ad); break; } // --- test q --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='q'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break; } if (2*y>y0+y1) Break; /* < */ for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line if( num_cross(x0, x1, y, y,box1->p,cs) == 1 && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break; } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */ for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline i=loop(box1->p,x0,y,dx,cs,0,RI); if ( i>x ) { x=i; j=y; } if ( x>dx/2 ) break; } if ( x=dx) Break; if (y1-j+1p,cs) != 0 ) ad=96*ad/100; // ~g if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){ ad=97*ad/100; if (hchar || !gchar) Break; // 4 } if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8 || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){ ad=96*ad/100; // 4 } if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break; if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100; if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break; // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 ) if (sdata->holes.num != 1) { if (dx<16) ad=98*ad/100; else Break; } if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 ) { if (dx<16) ad=98*ad/100; else Break; } // ~\it g if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8 && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g // what about unsure m1-m4? if(!gchar){ ad=ad*99/100; } // ~4 if( hchar){ ad=ad*99/100; } // ~49 Setac(box1,'q',ad); break; } return box1->c; } static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, ax,ay,bx,by,cx,cy,ex,ey, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */ (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */ // --- test i --------------------------------------------------- // if(box1->dots==1) // what about \it neighbouring ij for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot DBG( wchar_t c_ask='i'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (box1->num_frames > 2) Break; /* ~% , ToDo: allow double dot i? */ // ToDo: ':' check that high of dot is smaller than the vert. line! /* * o <== ya * o * * ooo <== yb * o * o * o * ooo */ ya=yb=y0; if (box1->dots!=1) ad=98*ad/100; if (sdata->holes.num>0) ad=96*ad/100; // ~ fat italic a gocr0801_bad5 while(dy>3*dx && box1->m2){ // test for vertical i without detected dot i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (dy-1-im3-2) break; i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP); // distance upper end to m2 > (m2-m1)/3 if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break; if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0 || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0 || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 ) { Setac(box1,'i',ad); return 'i'; /* beleave me, thats an "i"! */ } break; } // if( box1->dots!=1 ) Break; if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; // ya includes possible i-dot // failed on handwritten i m1=-22 frame2y=-31..-24 dy=15 // volume periphery(de:Umfang) num_vectors //# frame 0 ( +52, 32,14) 3*vol1<2*vol0 2*dy1num_frames>1) { // vector based i-dot check if (box1->frame_vol[1]>box1->frame_vol[0]/8) // no dust { if (3*box1->frame_vol[1]>2*box1->frame_vol[0]) Break; // to big if (3*box1->frame_per[1]>2*box1->frame_per[0]) Break; // not compact y=0; for (j=box1->num_frame_vectors[0]; jnum_frame_vectors[1] && j box1->frame_vector[j][1]) // find top ya = box1->frame_vector[j][1]; if ( y < box1->frame_vector[j][1]) // find bottom y = box1->frame_vector[j][1]; } if (box1->m2 && ya>box1->m2+2) Break; // dot starts below m2 if (2*y>=ya+y1) Break; // point ends to low } } else { Break; /* missing i-dot */ } // out_x(box1); #if 0 // dont work, because dots are removed as neighbours Aug10 /* pixel based i-dot check */ // search upper end of i-dot for (y=ya;3*yp,cs,1) == 1 ) break; if (3*y>=ya+2*y1) Break; // hmm, gap only, no dot? if (2*y>=ya+ y1) { ad=98*ad/100; MSG(fprintf(stderr,"wide gap");)} ya=y; if (box1->m2 && ya>box1->m2+2) Break; // dot starts below m2 for ( ;2*yp,cs,1) != 1 ) break; if (2*y>=ya+y1) Break; // point ends to low for ( ;2*yp,cs,1) == 1 ) break; // start y0 of base object yb=y; #endif MSG(fprintf(stderr," ya m1 yb %d %d %d", ya-box1->y0, box1->m1-y0, yb-box1->y0);) if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~: // if (5*yb>=2*ya+3*y1) Break; // huge gap, ~: // handwritten: ya=-31 m1=-22 yb=0 y1=15 // may be its better to trust the frame melting algorithm (ad=96%=ok) if (5*yb>=1*ya+4*y1) Break; // huge gap, ~: if (loop(bp,dx-1,yb+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml) ad=95*ad/100; // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); // printf(" dots=%d\n",box1->dots); out_x(box1); // \sl ~f. ! for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; if (y>(ya+3*y1)/4) Break; if (y>(ya+2*y1)/3) ad=96*ad/100; y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; y=(y1-yb+1)/2+yb-y0; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; if( yf>(y1-yb+1)/4+yb-y0 ) Break; if(yd>yc+2){ xa=loop(bp, 0,yc-1,dx,cs,0,RI); xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); if( xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); if(y>0){ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); if( i>0 ) y+=i-1; } if( yc-1+y < yd-1 ) Break; } else { y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); if( yc-1+y < yd-2 ) Break; } } if(yf0 ) y+=i-1; if( ye+1-y > yf+1 ) Break; } if( 2*y0 <= box1->m1+box1->m2 && loop(bp,0, 0,dx,cs,0,RI)+1 < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100; if( gchar ) // i is more often than j, be sure that realy correct Mai00 if( loop(bp, 0,2*dy/4,dx,cs,0,RI) -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break; // could be a broken + or similar thing? if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100; if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2 && loop(bp,dx-1, dy-1,dx,cs,0,LE)5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 ) ad=95*ad/100; Setac(box1,'i',ad); break; } // --- test j --------------------------------------------------- // if(box1->dots==1) // what about \it neighbouring ij for(ad=d=100;dy>4 && dx>0;){ // min 3x4 DBG( wchar_t c_ask='j'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // ToDo frames > 2 Break if (dx==1 && !gchar) Break; /* 2017-03 sure not a 'j', old=96% */ ya=y0; if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; for(y=ya;2*yp,cs,1) == 1 ) break; if(2*y>=ya+y1) Break; // hmm only gap ya=y; if( box1->m2 && ya>box1->m2+2 ) Break; for( ;2*yp,cs,1) != 1 ) break; if(2*y>=ya+y1) Break; // hmm no gap for( ;2*yp,cs,1) == 1 ) break; if(2*y>=ya+y1) Break; // hmm very large gap yb=y; if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml) // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); // printf(" dots=%d\n",box1->dots); out_x(box1); // \sl ~f. ! for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; if(y<=y1) Break; y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; y=(y1-yb+1)/2+yb-y0; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; if( yf>(y1-yb+1)/4+yb-y0 ) Break; if(yd>yc+2){ xa=loop(bp, 0,yc-1,dx,cs,0,RI); xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); if( xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); if(y>0){ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); if( i>0 ) y+=i-1; } if( yc-1+y < yd-1 ) Break; } else { y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); if( yc-1+y < yd-2 ) Break; } } if(yf0 ) y+=i-1; if( ye+1-y > yf+1 ) Break; } if( 2*y0 <= box1->m1+box1->m2 && loop(bp,0, 0,dx,cs,0,RI)+1 < loop(bp,0,dx/2,dx,cs,0,RI) ) { ad=97*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // if (loop(bp,0,dy-1,dx,cs,0,RI) // -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) // { ad=96*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ~c if( gchar ) // i is more often than j, be sure that realy correct Mai00 if( loop(bp, 0,2*dy/4,dx,cs,0,RI) -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break; // could be a broken + or similar thing? if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100; if (!gchar) ad=96*ad/100; if( box1->dots!=1 ) ad=98*ad/100; Setac(box1,'j',ad); break; } // --- test I --------------------------------------------------- for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4 DBG( wchar_t c_ask='I'; ) if( box1->dots==1 ) Break; if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break; for( y=dy/16;y1+dx/8 ) break; } if( y<3*dy/4 ) Break; // out_x(box1); // upper max width for(i2=i1=0,y=0;yi1){ i1=x;i2=y; } } for(i4=i3=0,y=3*dy/4;yi3){ i3=x;i4=y; } } if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure! if( i1>i5 ){ // look for edges else *80% } if(i1+1i2 ) i2=i; // printf(" get_line(%d,%d) %d\n",i1,i2, // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)); if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break; x =(i1-i2+4)/8; i1+=x; i2-=x; // upper and lower width (what about serifs?) y=dy/8; x =loop(bp,i1, y+0,dx,cs,1,LE); i=x; x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x; x =loop(bp,i1, y+0,dx,cs,1,RI); j=x; x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x; x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x; x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; if(dy>15) // v024a4 if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE) > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn) for(i=0,y=(dy+7)/16;y<(15*dy+7)/16 && i<2;y++) if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++; if( i>1+(dx+8)/32 ) Break; // rnd80-Droid-Sans-Mono I if(!hchar){ // right part (bow) of h is never a l if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4 && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4 && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z } if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */ if ( loop(bp,dx-1, dy/2,dx,cs,0,LE) <= dx/8 && loop(bp, 0, dy/2,dx,cs,0,RI) > dx/2 ) Break; /* ~] 2010-10 */ if ( loop(bp,dx-1, dy/2,dx,cs,0,LE) > dx/2 && loop(bp, 0, dy/2,dx,cs,0,RI) <= dx/8 ) Break; /* ~[ 2010-10 */ if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2 && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) if( loop(bp, 0, dy/8,dx,cs,0,RI) -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/ if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2 && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/ if (box1->m2 && 3*y0>box1->m1+2*box1->m2) if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) Break; // ~i if(i1+1p,cs,1) != 1 || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 ) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r if (box1->m4 && y1m4) { // probably lower dot? if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) { ad=96*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } } // ~! // a---b // I // I // c---e // check against Z for(bx=0,ax=dx,ay=by=y=0;ybx) { bx=dx-1-i; by=y; } i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1dy-1-dy/4;y--){ i =loop(bp,0,y,dx,cs,0,RI); if (iex) { ex=i; ey=y; } } x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI); MSG(fprintf(stderr,"xy= %3d %3d i= %3d ad=%d",x,y,i,ad);) x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE); if (j>1 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100; // 2010-10-10 invalid2010 if (j>1 && ( i>2*j || 2*i< j )) ad=97*ad/100; // j>0 to j>1 MSG(fprintf(stderr,"xy= %3d %3d j= %3d ad=%d",x,y,j,ad);) i=loop(bp,0,0,dy,cs,0,DO); if (i>dy/8 && idx/4) {ad=96*ad/100;MSG({})} // ~l 5x7 if (loop(bp,dx-1,0,dx,cs,0,LE) // tmp08/0811qemu1 -loop(bp, 0,0,dx,cs,0,RI)==dx/4) ad=98*ad/100; // ~l 4x10 2010-09 // JS-2010-09 do we have a full square (bad font) we can heve I or l // I and l should have 98% and 99% after context correction (JS2010-09) if (ad>98 && get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } if (gchar) ad=98*ad/100; // J if (box1->m3 && 2*y1<=box1->m2+box1->m3) {ad=96*ad/100;MSG({})} // ' // 2010-10-01 sample tmp10/barcode_code128_145 if (dx<3 && dy>10 && box1->m4==0) { ad=ad*95/100;MSG({})} // just a vertical line? Setac(box1,'I',ad); break; } // --- test J --------------------------------------------------- 22Nov06 for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d', // rewritten for vectors 0.42 int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners DBG( wchar_t c_ask='J'; ) if (sdata->holes.num > 0) Break; /* no hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the J */ if (aa[3][2]>d) Break; /* [2] = distance */ /* searching for 4 notches between neighbouring ends */ /* type A B 6OOOO 6O5 7O5 7O O O O O 2O 1O4 1O4 OO 2OO 3 3 */ /* Warning: aa0 can be left upper or left lower point for type B */ /* get a point on the inner low left side of the J */ i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0); /* failed for slanted J before Jun09 */ i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx/8,y1-dy/8); /* get the most left point on the lower part of the J */ i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8); /* get a point on the middle of the bottom of the J */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1); /* get a point on the outer low right side of the J */ i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3); /* get a point on the outer right side below top serif */ i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0); /* get a point on the left side of upper serif */ i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0); /* get a point on the most right left side of upper serif */ i7=nearest_frame_vector(box1,i6,i1, x1, y0); MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);) /* check the most left point on middle left area 3 vs. J 2013-06 */ i =nearest_frame_vector(box1,i7,i1,x0,y0+dy/2); if (box1->frame_vector[i ][0]< box1->frame_vector[i1][0]-dx/8-1) Break; // 3 /* check the highest point on lower left area */ i =nearest_frame_vector(box1,i1,i3,x0,y0); if (box1->frame_vector[i ][1]-y0frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a /* check the lowest point on upper left area, serife? */ j =nearest_frame_vector(box1,i6,i7,x0,y1); if (box1->frame_vector[i ][1] -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low if (box1->frame_vector[i1][1] -box1->frame_vector[i7][1]frame_vector[i4][1] -box1->frame_vector[i5][1]frame_vector[i7][0]frame_vector[i1][0] -box1->frame_vector[i2][0]<=dx/8) Break; // ~1 if (box1->frame_vector[i1][0] -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1 if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1 if (aa[0][2]==0) { // ]? ad=99*ad/100; if (aa[1][2]==0) ad=98*ad/100; if (aa[2][2]<=aa[3][2]) ad=97*ad/100; } /* check for left bow */ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][0] /* [0]=x */ frame_vector[i1][0]) break; /* curve? */ } if (i==i4) Break; // ~I /* check for no right bow */ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][0] /* [0]=x */ >box1->frame_vector[i4][0]) break; } if (i!=i4) Break; // ~I /* check for no right bow */ for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] > y0+dy/4) break; } if (i!=i6) Break; // ~Y /* check if upper left and lower left points are joined directly */ ld=line_deviation(box1, i7, i1); MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 /* check if lower right and upper right points are joined directly */ ld=line_deviation(box1, i4, i5); MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // J exists as gchar and ~gchar if (!hchar){ ad=99*ad/100; } if (box1->num_frames>1) { ad=98*ad/100; // j } Setac(box1,'J',ad); break; } return box1->c; } static wchar_t ocr0_brackets(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ ad,r1,r2; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test > derived from xX --------------------------------------------------- // rewritten for vectors v0.41 for(ad=d=100;dx>1 && dy>2;){ // min 3x2 // 0 - indizes 0,1,i1,i2 pointing to edges of the char // \ . // \ . // i1,i2 // / // / // 1 DBG( wchar_t c_ask='>'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */ /* calculate the half distance to the center */ d=2*sq(128/4); /* now we check for the 2 left ends of the > */ if (aa[0][2]>d) Break; /* upper left end */ if (aa[1][2]>d) Break; /* lower left end */ if (aa[1][1]-aa[0][1]num_frame_vectors[0]) { if (box1->frame_vector[i ][0] >=box1->frame_vector[i1][0]) i1=i; /* notice most right vector */ } if (i1==i || i1==aa[0][3]) Break; /* calculate the distance to the center */ x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; if (2*x-aa[0][0]-aa[1][0](dy+2)) Break; if ( aa[0][0]+aa[1][0]-2*x>=0) Break; d=line_deviation(box1, aa[0][3], i1); // 2017-03: fixed >sq(1024/4); /* check if upper left and center point are joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) j=nearest_frame_vector(box1,i1,aa[1][3],x0-2*dx,y1-dy/8-1); d=line_deviation(box1, i1, j /*aa[1][3] */); // 2010-10 ocr-b /* check if lower left and center point are joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) /* run along right side from bottom to top */ for (j=i2=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i ][0] >=box1->frame_vector[i2][0]) i2=i; /* notice most right vector */ // MSG(fprintf(stderr,"search right: %d %d %d %d",i,i2,aa[1][3],aa[0][3]);) } if (i2==i || i2==aa[1][3]) Break; /* calculate the distance to the center */ x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break; if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break; if (aa[0][0]>=x || aa[1][0]>=x) Break; j=nearest_frame_vector(box1,i2,aa[0][3],x0+dx/4,y0-2*dy); if (box1->frame_vector[j][0]-x0>dx/4) Break; d=line_deviation(box1, i2, j /* aa[0][3] */); /* check if upper left and center point are directly joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) j=nearest_frame_vector(box1,aa[1][3],i2,x0+dx/4,y1+2*dy); if (box1->frame_vector[j][0]-x0>dx/4) Break; i=nearest_frame_vector(box1,aa[1][3],i2,x1+2*dx,y0+dy/2+dy/8); if (box1->frame_vector[i][1]-y0>dy/2+dy/8) Break; // ~() d=line_deviation(box1, j /* aa[1][3] */, i /* i2 */); // 2010-10-08 /* check if lower left and center point are directly joined */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) /* ToDo: calculate momentums or max derivations along lines to distinguish )]}> i1,i2 */ if (sdata->gchar) ad=98*ad/100; if (sdata->hchar) ad=99*ad/100; bc='>'; Setac(box1,bc,ad); break; } // --- test /\\ ------------------------------------------------ // if(bc==UNKNOWN) // if(!box1->dots) for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font DBG( wchar_t c_ask='/'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ #if 1 for(i=y=0;y2 || (i>0 && dy<16)) Break; #endif /* get the center as exact as possible */ i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE) + loop(bp, 0,dy/2 ,dx,cs,0,RI) + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI); if (abs(i2-2*dx)>1+dx/2) Break; // ??? if (abs(i2-2*dx)> dx/2) ad=99*ad/100; i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // top right end i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); // bottom right end i4=loop(bp, 0,0 ,dx,cs,0,RI); // top left end i6=loop(bp, 0,dy-1 ,dx,cs,0,RI); // bottom left end // i= (box1->m4 + box1->m3)/2 - box1->m2; // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); // ~lI for(i=i4,y=0;ydx/6+1 ) break; i=x; } if( ydx/6+1 ) break; i=x; } if( ym2 && sdata->gchar) ad=99*ad/100; // 2010-10 if (box1->m2 && !sdata->hchar) ad=98*ad/100; if (box1->m2 && dy < box1->m3 - box1->m2 -1) ad=96*ad/100; // ~, // check top-right-end, bottom-left-end if (i1<=dx/8 && i6<=dx/8 && i4-(dx-i3)>=dx/8 ) { // / if (i4<=dx/8 && i3<=dx/8) Break; if (i4-(dx-i3)=dx/8 ) { // \ ... if (i6<=dx/8 && i1<=dx/8) Break; if (i6-(dx-i1) ------------------------------------------------ // if(bc==UNKNOWN) // if(!box1->dots) for(ad=d=100;dx>1 && dy>4;){ // min 3x4 '(' ')' DBG( wchar_t c_ask='('; ) if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */ if (aa[0][1]>y0+dy/8 && aa[3][1]>y0+dy/8) Break; // no upper end? if (aa[1][1]=y1-dy/8 && aa[2][0]>=x1-dx/8 // ( vs. l (handwritten) && aa[3][1]<=y0+dy/8 && aa[3][0]<=x1-dx/4) ad=99*ad/100; if (aa[2][1]>=y1-dy/8 && aa[2][0]>=x1-dx/8 // ( vs. l (handwritten) && aa[3][1]<=y0+dy/8 && aa[3][0]<=x1-dx/2) Break; #if 1 for(i=y=0;y2 || (i>0 && dy<16)) {Break;}; #endif /* look for the extrema => r1..r2 */ for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){ j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(jdy){ // from Aug06 vector-version of greater is used // if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; } if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; } } if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5) ad=98; if (dx<8) ad=99*ad/100; if (dx<6) ad=96*ad/100; if( 2*dx > OCR_JOB->res.avX && 4*dx>dy ) ad=98; // printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6); if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1 && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) { if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) { if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16m4+box1->m3)/2-box1->m2; // // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); if(2*i2i4+i6 && 2*dx=i){ Setac(box1,(bc=')'),98);break; } if(2*i2>i1+i3 && 2*i5=i){ if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100; if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100; for(x=y=0;yx ) x=i; } for(y=0;y<(dy+2)/4;y++){ i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i=dx/8+1) ad=99*ad/100; // ~{ Jul09 if (loop(bp,0,dy/2-dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09 Setac(box1,(bc='('),ad); break; } Break; } // '(' ')' // --------- test [] -------------------------------- for(ad=d=100;dx>2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font DBG( wchar_t c_ask=']'; ) if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */ if (box1->num_frames!=1) break; if (box1->num_frame_vectors[0]!=10) ad=98; // not charp, unsure if (!hchar) ad=97*ad/100; for(y=0;yp,cs,2) == 2 && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;}; if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2 && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;}; if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0 || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 ) if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) { Setac(box1,(bc='['),ad);break; } if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0 || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 ) if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) { Setac(box1,(bc=']'),ad);break; } break; } #if CODE_NOT_COMPLETED // --- test ] ------- for(ad=d=100;dx>2 && dy>3;){ DBG( wchar_t c_ask=']'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=98*ad/100; /* # */ /* 1/8 distance to the center */ d=2*sq(128/16); /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0]<7*dx/8) Break; if (aa[2][0]-aa[1][0]<7*dx/8) Break; if (aa[1][1]-aa[0][1]<7*dy/8) Break; if (aa[2][1]-aa[3][1]<7*dy/8) Break; if (aa[3][0]-aa[0][0]<2) Break; /* to small */ if (aa[2][0]-aa[1][0]<2) Break; /* to small */ MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\ aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\ aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\ aa[0][2],aa[1][2],aa[2][2],aa[3][2]);) /* left and right vertical line */ d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; /* search uppermost left ^ */ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; if (y-y0 > 5*dy/8) Break; if (x-x0 > 5*dx/8) Break; /* search uppermost right ^ ~H */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); if ( box1->frame_vector[i3][0]-x> dx/4 && box1->frame_vector[i3][1]-y<=dy/8) Break; /* check if upper left and lower right point are joined directly */ dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break; /* check if lower left and lower left point are joined directly */ dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break; if (!hchar) ad=99*ad/100; if ( gchar) ad=98*ad/100; // \sc N ac=(wchar_t) ']'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } #endif // --------- test ocr-a-[] -------------------------------- if(bc==UNKNOWN) for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment DBG( wchar_t c_ask='['; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if (!hchar) ad=97*ad/100; if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; if ( loop(bp,dx-1,dy/2,dx,cs,0,LE) +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O for(y=dy/8;yp,cs,1) == 0) { Setac(box1,(bc='['),ad);break; } if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0) { Setac(box1,(bc=']'),ad);break; } break; } // --------- test {} -------------------------------- for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ DBG( wchar_t c_ask='{'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (!hchar) ad=97*ad/100; for(y=0;y3*dx/4 ) {ad=99*ad/100;MSG({})} if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) {ad=99*ad/100;MSG({})} // < if ( loop(bp,0, 0,dy,cs,0,DO)=8) if ( loop(bp,dx-1, 0,dx,cs,0,LE) + loop(bp,dx-1,dy/4,dx,cs,0,LE) - 2*loop(bp,dx-1,dy/8,dx,cs,0,LE) >=dx/8 ) {ad=98*ad/100;MSG({})} // < if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1 || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; Setac(box1,(bc='{'),ad);break; } // --------- test {} -------------------------------- for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ DBG( wchar_t c_ask='}'; ) if (!hchar) ad=97*ad/100; for(y=0;y3*dx/4 ) {ad=99*ad/100;MSG({})} if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;MSG({})} // > if ( loop(bp,dx-1, 0,dy,cs,0,DO)=8) if ( loop(bp,0, 0,dx,cs,0,RI) + loop(bp,0,dy/4,dx,cs,0,RI) - 2*loop(bp,0,dy/8,dx,cs,0,RI) >=dx/8 ) {ad=98*ad/100;MSG({})} // < if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ??? if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1 || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; Setac(box1,(bc='}'),ad);break; } return box1->c; } #if 0 /* ---------- empty prototype function for copy and expand ---------- */ static wchar_t ocr0_XXX(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ac,ad; /* tmp-vars */ // --- test XXX --------------------------------------------------- return box1->c; } #endif /* ----------------------- part9 -------------------------------- */ static wchar_t ocr0p9(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ i1,i2,i3,i4; /* tmp-vars */ int xa,xb, /* used for store significant points of char */ dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */ ya,yb,ad,cs=sdata->cs; wchar_t ac,bc=UNKNOWN; // bestletter int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ int hchar; // char is higher than e int gchar; // char has ink lower than m3 // --- hchar --- gchar ------------------------- hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; /* reserved for the future */ // --- test beta,\3,sz,"s --------------------------------------------- if(bc==UNKNOWN && hchar) for(ad=d=100;dx>3 && dy>6;){ // min 4x7 DBG( wchar_t c_ask='S'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ /* this part is provisorium, should be changed! a-\ | d b| / | \ -c / */ if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2 && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break; for(i=1+dy/16,y=y0+dy/8;y0;y++){ if( yp,cs) != 2 ) i--;} else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;} if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--; if( yp,cs,1) == 0 ) i--; } if( i<=0 ) break; // out_x(box1); for(y=y0+dy/3;yp,x1,y,dx,cs,0,LE); if( i>=dx/8 ) break; i+=loop(box1->p,x1-i,y,dx,cs,1,LE); if( i>=dx/2 ) break; } if( y>=y1-dy/3 ) break; for(y=y0+dy/5;yp,cs,1) == 1 ) break; if( y>=y0+dy/3 ) break; for(y=y0+dy/2;yp,cs,1) == 1 ) break; if( y>=y1 ) break; for(y=y1-dy/3;yp,x1,y,dx,cs,0,LE); if( i>dx/4 && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break; } if( ym3==0 || 2*y1m3+box1->m4 ) if( loop(box1->p,x1,y1, dx,cs,0,LE)==0 && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R for(x=x0+dx/4;xp,cs) == 3 ) break; if( x>=x1-dx/4 ) break; i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00 for(x=dx/5;x i ) break; if( x==dx/2 ) break; x=x0+loop(bp,0,dy/4,dx,cs,0,RI); for(;xp,cs,1) == 0 ) break; if( xp,cs,NULL) != 0 ) break; if (sdata->holes.num != 0) break; bc=LATIN_SMALL_LETTER_SHARP_S; Setac(box1,(wchar_t)bc,98); break; } // --- test + ------------------------------------------------ for(ad=d=100;dx>2 && dy>2;){ // min 3x3 DBG( wchar_t c_ask='+'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ xa=(dx+1)/3-1; ya=(dy+1)/3-1; /* size of the 4 gaps = 1/3 * 1/3 */ xb=(dx+1)/4; yb=(dy+2)/4; /* smaller gap */ if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // left upper if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; // left lower if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // right upper if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) { // right lower if( get_bw(x1-xa,x1,y1-yb,y1,box1->p,cs,1) == 1 ) Break; ad=99*ad/100; // smoothed inner corner? 0907 } for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; } } if (3*dx<2*dy) ad=99*ad/100; // ~t if( !i ) Break; ac=(wchar_t) '+'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test $ (vert. line crossing) ------------------------------ for(ad=d=99;dx>3 && dy>6;){ // min 4x7 2010-10 DBG( wchar_t c_ask='$'; ) if (sdata->holes.num != 2) Break; if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break; i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1x1-dx/5 ) Break; i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2i1 ) Break; ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100; // check upper left and lower right half circle, $ (Oct08: removed) // Oct08 JS: check the position of holes (better for tiny fonts) // upper hole must be the first!? (hole[].x0 = relative coordinates) if ( sdata->holes.hole[0].y0 < sdata->holes.hole[1].y0 ) i4=0; else i4=1; /* sort to [idx^i4] */ if ( sdata->holes.hole[0^i4].y1 > sdata->holes.hole[1^i4].y0 ) Break; /* no y-overlap allowed */ // upper left hole // MSG( fprintf(stderr,"hole[0]=x0=%d %d", sdata->holes.hole[0^i4].x0, sdata->holes.hole[0^i4].x1); ) if ( sdata->holes.hole[0^i4].x0 > (dx+1)/3 ) Break; if ( sdata->holes.hole[0^i4].x1 > dx/2+dx/4 ) Break; if ( sdata->holes.hole[0^i4].y1 >= dy/2+dy/8 ) Break; if ( sdata->holes.hole[0^i4].y0 > dy/2-dy/8 ) Break; // lower right hole // fprintf(stderr,"\nDBG hole[1]=x0=%d %d", sdata->holes.hole[1^i4].x0, sdata->holes.hole[1^i4].x1); if ( sdata->holes.hole[1^i4].x0 <= dx/2-dx/4 ) Break; if ( sdata->holes.hole[1^i4].x1 < dx/2+dx/4 ) Break; if ( sdata->holes.hole[1^i4].y1 < dy/2+dy/8 ) Break; if ( sdata->holes.hole[1^i4].y0 <= dy/2-dy/8 ) Break; if (ad<95) Break; ac=(wchar_t) '$'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test $ (no vert. line crossing, but lines on top and bottom) ----- for(ad=d=99;dx>3 && dy>6;){ // min 4x7 2010-10 DBG( wchar_t c_ask='$'; ) if (box1->num_frames != 1) break; /* 5x7 ..@..<- @@@@@< @.... @@@@@ ....@ @@@@@ ..@..<- */ if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break; i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1x1-dx/5 ) Break; i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2i1 ) Break; i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x1+2*dx, (y0+y1)/2); i2=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0-2*dx, (y0+y1)/2); if (box1->frame_vector[i1][0]-x0<3*dx/4 || box1->frame_vector[i2][0]-x0> dx/4 || box1->frame_vector[i1][1]-y0 frame_vector[i2][1]-y0) Break; if (ad<95) Break; ac=(wchar_t) '$'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test & ------------------------------------------------ for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */ DBG( wchar_t c_ask='&'; ) if (sdata->holes.num != 2) Break; if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break; i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break; i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (idx/2) Break; i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (ii1) Break; for( i2=0, y=dy/4; y<=dy/2+1; y++ ){ i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i; } if(2*i2-i1-i3<1) Break; // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break; if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break; if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break; for( x=dx-1; x>=dx/2; x-- ){ if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break; } if( x<=3*dx/4 && x 3 ) { // glued ah if (dy>15) { Break; } else ad=96*ad/100; } if (!hchar) ad=98*ad/100; bc=(wchar_t) '&'; Setac(box1,bc,ad); if (ad>=100) return bc; break; } // --- test \it & like \epsilon\tau ------------------------------ if(bc==UNKNOWN) for(ad=d=100;dx>7 && dy>7;){ DBG( wchar_t c_ask='&'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break; if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break; if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break; if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break; if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break; if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break; if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break; if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break; if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break; if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break; ac=(wchar_t) '&'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test ? --------------------------------------------------- for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2) DBG( wchar_t c_ask='?'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~? if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~? for(y=y0;yp,cs,1) != 1 ) break; // lower end if (2*ym4) { // probably lower dot not catched in box? if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break; i1=box1->m4; for(;i1>y1;i1--) // new y1 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot } y--; i=y-y0+1; // new dy for (y=0;yp, cs) == 2 ) break; if( y>=dy/4 && loop(bp, 0,y,dx,cs,0,RI) // 2018-09 -loop(bp,dx-1,y,dx,cs,0,LE)>dx/2 && loop(bp, 0,dy/8,dx,cs,0,RI)7 || dy>13 || 13*dx<7*dy)) Break; // bad fix // ToDo better add bow-detection against fail ?! // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break; if (sdata->holes.num > 0) Break; for(y=y0+dy/2;y<=i1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break; if( y==i1 ) Break; for( ;y<=i1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break; if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2 bc='?'; Setac(box1,(wchar_t)bc,98); return bc; } // --- test !| --------------------------------------------------- for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4 DBG( wchar_t c_ask='!'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // measure thickness if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break; if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break; for(y=y0;yp,cs,1) != 1 ) break; // lower end if (2*ybox1->m3-dy/8) ad=ad*97/100; /* missing dot? */ i1=y1; if (y==y1 && box1->m4) { // probably lower dot not catched in box? if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) { i1=box1->m4; for(;i1>y1;i1--) // new y1 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot } } i2=i1; for( i1=0,y=y0;y<=i2;y++){ i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break; if(i==0 && i1==0) i1=y; } if(y<=i2 || i1==0 || i1dx/4+1 ) Break; // f if (!hchar) ad=96*ad/100; Setac(box1,(wchar_t)'!',ad); break; } // --- test * five egdes (jagges? beames?) what is the right english word? ---- for(ad=d=99;dx>2 && dy>4;){ DBG( wchar_t c_ask='*'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1 && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break; x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded /* upwarts from center */ dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break; if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */ if (get_bw(x,x,0,y,bp,cs,2)==2) Break; /* horizontal */ dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break; if (dy<8) if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2 && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break; /* down (right) */ i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100); j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j; if(i<95) Break; /* down (left) */ dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up /* check for lower gap at bottom */ dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break; dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);) Setac(box1,(wchar_t)'*',ad); break; } // --- test * six egdes (jagges? beames?) incl. vert. line (|+X) ---- for(ad=d=100;dx>4 && dy>4;){ DBG( wchar_t c_ask='*'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3 && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break; if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break; if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break; if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break; if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break; if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break; if (dx>5) { dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam /* check vertical line */ dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break; } MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);) Setac(box1,(wchar_t)'*',99); break; } // --- test * six egdes '*' = 'x'+'-' incl. horizontal line ---- for(ad=d=100;dx>3 && dy>4;){ DBG( wchar_t c_ask='*'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ if( num_cross( dx/8, dx/8, 0, dy-1,bp,cs) != 3 && num_cross(1+dx/8,1+dx/8, 0, dy-1,bp,cs) != 3) Break; if( num_cross(dx-1-dx/8,dx-1-dx/8,0,dy-1,bp,cs) != 3 && num_cross(dx-2-dx/8,dx-2-dx/8,0,dy-1,bp,cs) != 3) Break; if( num_cross( 0,dx-1, 0, 0,bp,cs) != 2) Break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 2) Break; if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 1) Break; // check upper and lower gap if( num_cross(dx/2,dx/2,0,dy/8,bp,cs) != 0) Break; if( num_cross(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs) != 0) Break; if (dx>5) { dbg[0]=i=get_line2(dx-2-dx/8,0,dx/8,dy-1,bp,cs,100); if(i<95) Break; // black upwarts beam dbg[1]=i=get_line2(dx/8,0,dx-2-dx/8,dy-1,bp,cs,100); if(i<95) Break; // black downwards beam /* check horizontal line */ dbg[2]=i=get_line2(0,dy/2,dx-1,dy/2,bp,cs,100); if(i<95) Break; } MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);) Setac(box1,(wchar_t)'*',98); break; } // --- test @ - a popular char should be detectable! added in version v0.2.4a5 if(bc==UNKNOWN) for(ad=d=99;dx>4 && dy>7;){ // 2010-09-24 5x8 sample DBG( wchar_t c_ask='@'; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ // check ~ 7x10 0 with dot in it // num_holes==1 + hole.y0<=dy/8,>=y1-dy/8 if (sdata->holes.num==1 && sdata->holes.hole[0].y0<=y0+dy/8 && sdata->holes.hole[0].y1>=y1-dy/8) Break; if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break; if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break; if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break; if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break; /* ..@@@@..<- 8x10 example .@@..@@. .@@@@@. - 7x9 sample @@....@@ @@...@@ .@@@.<- 5x8 sample @@..@@@@< @@...@@< @...@ @@.@@.@@ @@.@@@@ @..@@< @@.@@.@@ @@.@@@@ @.@.@ @@..@@@. @@.@@@@ @.@.@ @@...... @@.@@@. @.@@. .@@...@@ @@..... @.... ..@@@@@.<- .@@@@@.<- .@@@@<- */ x=(7*dx+3)/16; // 8x10=3 7x9=3 5x8=2 (within vert. middle line) y=dy/2; i=num_cross(0,dx-1,y,y,bp,cs); j=loop(bp, 0,y,dx,cs,0,RI); // measure line width, 2010-09-24 j=loop(bp, j,y,dx,cs,1,RI); if (dx>=4*j && (i<3 || i>4)) Break; if (dx>=5*j && i!=4) ad=98*ad/100; i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break; if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs); if (abs(4-j)4) Break; if (i!=4) ad=97*ad/100; if( num_cross(0, x,y,y,bp,cs) != 2 ) Break; if( num_cross(x,dx-1,y,y,bp,cs) != 2 && dx>=4*j) Break; if( num_cross(x,x,0, y,bp,cs) != 2 ) Break; if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break; if (dx>7) { // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break; if (sdata->holes.num != 1) Break; if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; } Setac(box1,(wchar_t)'@',ad); break; } // --- test paragraph v0.2.6 if(bc==UNKNOWN && hchar) for(ad=d=100;dx>4 && dy>15;){ DBG( wchar_t c_ask='$'; ) if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */ if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break; if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break; if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break; if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break; if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break; if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break; if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break; if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break; if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break; Setac(box1,SECTION_SIGN,96); break; // paragraph=0xA7=167 } return bc; } /* ----------------------- partx -------------------------------- */ static wchar_t ocr0px(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */ int ya,ad; /* used for store significant points of char */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ wchar_t ac,bc=UNKNOWN; // bestletter int hchar; // char is higher than e int gchar; // char has ink lower than m3 // --- hchar --- gchar ------------------------- hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; /* reserved for special chars, to test at the end */ // --- test 'ff' --------------------------------------------------- // ToDo: better check and call test 'f' and 'f' with subboxes if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H DBG( wchar_t c_ask='f'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break; if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break; if( loop(bp,0 ,dy/8,dx,cs,0,RI) + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A for( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; } if( !i ) break; for( x=dx/4; x 3*dy/8 ) break; if ( 10*y > dy ){ /* italic */ i=loop(bp,x ,dy-y,dx,cs,0,RI); if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break; } } if( x>=dx/2 ) break; x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI) +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE); for( i=1,y=dy/4; ydx/5 ) i=0; } if( !i ) break; // ~K Jul00 for( i=0,ya=y=y0+dy/4; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } if( i<=dx/2 ) break; ya-=y0; if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */ for( y=ya; y 2 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; if ( yp,cs,1) == 0 ) i=0; } if( !i ) break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; } if( i ) break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; } if( i ) break; for(i=1,y=y0;y<=y0+dy/4 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) break; for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) break; if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100; if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break; if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break; i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) break; i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) break; if(abs(i1+i3-2*i2)>dx/16+1) break; if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break; if (!hchar) ad=96*ad/100; if (!gchar) ad=99*ad/100; ac=LATIN_SMALL_LIGATURE_FF; Setac(box1,ac,ad); break; } // --- test ae --------------------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // provisorium DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; ) if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */ if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 ) if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break; for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; } } if( x3*dx/4 ) Break; for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } } if( x3*dx/4 ) Break; for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){ j=loop(bp,dx-1 ,y,dx,cs,0,LE); j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>x) { i4=y; x=j; } } if( xp,cs,NULL) != 1 ) Break; if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; ac=LATIN_SMALL_LETTER_AE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test AE --------------------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>5 && dy>6;){ // provisorium DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1 && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1 && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break; // check for upper horizontal line j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j; j=loop(bp,dx-1-j,0,dx,cs,1,LE); i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (ij) j=i; if (x>dx/8) Break; if (jx) break; x=j; j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; } j=loop(bp,dx-1 ,y,dx,cs,0,LE); j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; } } if( y<3*dy/4 || i1i1) { i1=j; } j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE); j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; } } if( i1<=dx/4 || i3<=dx/4 ) Break; for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E if( num_cross(x,x, 0,dy-1,bp,cs) == 3 ) if( num_cross(x,x, 0,dy/4,bp,cs) == 1 ) if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 ) if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break; } if (x<=dx/2) Break; // not found if (sdata->holes.num != 1) Break; if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break; ac=LATIN_CAPITAL_LETTER_AE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test /0 /o /O O_WITH_STROKE ----------------------------------------- for(ad=99;dx>4 && dy>4;){ // provisorium DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break; if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break; if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break; if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break; i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break; i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1; i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break; for(y=1;y3*dx/16 ) break; } if( yholes.num != 2) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break; if ( hchar && 2*y0m1+box1->m2 ) ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE; else ac=LATIN_SMALL_LETTER_O_WITH_STROKE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test /c /C C_WITH_STROKE CENT_SIGN -------------------------- // here only the version with a continuously vertical line (not broken variant) if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>4;){ // provisorium DBG( wchar_t c_ask=CENT_SIGN; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break; if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break; if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break; if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break; i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break; i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1; i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break; for(y=0;ydx/16+1) x-=dx/16+1; j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j); if( j>(dx+4)/8 ) ad=96*ad/100; if( j>(dx+2)/4 ) break; } if( yp,cs,NULL) != 1 ) Break; if (sdata->holes.num != 1) Break; ac=CENT_SIGN; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test EURO_CURRENCY_SIGN ----------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // provisorium DBG( wchar_t c_ask='&'; ) if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break; if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break; for(i=0,y=dy/4;ydx/4 ) break; j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j; } if( ydx/2 ) break; } if( y>=dy-dy/4-1 ) break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if (sdata->holes.num != 0) break; ac=EURO_CURRENCY_SIGN; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test LETTER_C_WITH_CEDILLA --------------------------------------------------- if (bc==UNKNOWN) if (gchar) for(ad=98;dx>3 && dy>6;){ // provisorium DBG( wchar_t c_ask='c'; ) if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */ j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE); x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (xdx) Break; // ~4 ocr-b if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break; for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,0,y,dx,cs,0,RI); if(j0 ) break; i1=x; for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } } if( xdy/4) break; j =loop(bp,dx/2,j,dy,cs,0,DO); if(j3*dx) break; j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~() // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if (sdata->holes.num) break; if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA; else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test # --------------------------------------------------- for(ad=99;dx>4 && dy>4;){ // never sure? DBG( wchar_t c_ask='#'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if (sdata->holes.num < 1) Break; if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break; if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break; if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break; if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break; /* fat "#" have only small ends on left and right side, we tolerate this */ j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j=dx/2) Break; if (j=dx/2) Break; if (j3*dx/4) { i1=0; break; } j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; } j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; } j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; } } if (i1holes.num != 1) {ad=95*ad/100;} if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; ac=(wchar_t) '#'; if( gchar ) {ad=99*ad/100;} Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156 if (bc==UNKNOWN) for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium DBG( wchar_t c_ask='#'; ) if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break; ac=BULLET; if (gchar && !hchar) ad=80*ad/100; Setac(box1,ac,ad); if (ad>=100) return ac; break; } /* --- test | (vertical line, could be a I or l) --- */ for(ad=99;dy>4 && 2*dxp,cs,2) != 0 ) break; /* more unsure if the borders are not exact */ if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100; if (3*dxm2 && 2*y1< box1->m2+ box1->m3) Break; if (box1->m2 && 3*y1< box1->m2+2*box1->m3) ad=95*ad/100; ac='|'; if (!hchar) ad=98*ad/100; if (dx==1 && hchar && gchar) ad=99; /* 2017-07 tables */ Setac(box1,ac,ad); break; } /* --- test | (vertical line, could be a i if gray and small) --- */ for(ad=99;dy>4 && dy<12 && 3*dx<=dy;){ /* 2017-08 2x9,3x9 sslmozFP */ DBG( wchar_t c_ask='i'; ) /* solid small block but grey gap */ /* test if everything is filled black */ if( get_bw(x0+dx-1,x1-dx+1,y0+dy-1,y1-dy+1,box1->p,cs,2) != 0 ) break; /* ToDo: check again withmodified cs? */ if (gchar || !hchar) Break; if (dx>3) ad=98; { int x,y,a,gmi=999999,gma=0,gme=0, yma=999999; for (y=0;yp, x0+x, y0+y) & ~0x0F; // 2018-09 ~marker if (agma) { gma=a; yma=y; } gme += a; DBG( IFV fprintf(stderr,"\nDBG i| y=%2d grey=%4d",y,a); ) } DBG( IFV fprintf(stderr,"\nDBG i| yma %2d gmi,me,ma %4d %4d %4d",\ yma,gmi,gme/dy,gma); ) /* most white column */ if (yma>=dy/2 || y0+yma>box1->m2) Break; if (dy*gmanum_ac;i++) /* reduce l|I */ if (box1->wac[i]<100 && ad <= box1->wac[i]) box1->wac[i]--; } Setac(box1,ac,ad); break; } /* --- test | (vertical line with small gap, pipe symbol) --- */ if (box1->num_frames == 2) for (ad=99;dy>6 && 2*dxframe_vol[0]-box1->frame_vol[1]) // 10 + 8 > box1->frame_vol[0]+box1->frame_vol[1] ) Break; if ( box1->frame_vol[0]+box1->frame_vol[1] < 7*(dx*dy)/8 ) Break; /* test if everything is filled black */ if ( get_bw(x0,x1,y0+dy/2+0,y0-dy/2+0,box1->p,cs,1) != 0 && get_bw(x0,x1,y0+dy/2-1,y0-dy/2-1,box1->p,cs,1) != 0 ) Break; /* more unsure if the borders are not exact */ if (3*dxm2 && 4*y1< box1->m2+3*box1->m3) Break; if (box1->m2 && 4*y1<0*box1->m2+4*box1->m3) ad=95*ad/100; ac='|'; if (!hchar) ad=98*ad/100; Setac(box1,ac,ad); break; } // --- test % --------------------------------------------------- for(ad=100;dx>5 && dy>7;){ // provisorium DBG( wchar_t c_ask='%'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if ( box1->num_frames != 1 && box1->num_frames != 3 && box1->num_frames != 5 ) break; if (box1->num_frames == 1) { // tiny fat font? Break; if (dx>5) Break; ad=ad*95/100; } if (box1->num_frames >= 3 && box1->frame_vol[1]<0 && box1->frame_vol[2]<0) { // small font? 7x8 /* 12x17 ocr-b .@@@...@ @@.@@.@@ .@@@.@@. ...@@@.. ..@@@... .@@.@@@. @@.@@.@@ @...@@@. */ // aa[] belongs to the frame 0 only! 2010-10 if (aa[0][0]-x0> dx/4 || aa[0][1]-y0> dy/4) Break; // upper left / if (aa[2][0]-x0<3*dx/4 || aa[2][1]-y0<3*dy/4) Break; // lower right / if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left / if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right / j =nearest_frame_vector(box1,aa[3][3],aa[0][3],x0+dx/2,y0+dy/2); if (box1->frame_vector[j][0]-x0< dx/2 || x1-box1->frame_vector[j][0]< dx/4 || box1->frame_vector[j][1]-y0<=dy/8) Break; // ~8B j =nearest_frame_vector(box1,aa[1][3],aa[2][3],x0+dx/2,y0+dy/2); if (box1->frame_vector[j][0]-x0< dx/4 || x1-box1->frame_vector[j][0]< dx/2 || y1-box1->frame_vector[j][1]<=dy/8) Break; // ~8B if (dx>7 && ad==100) {ad=ad*99/100;MSG({})} // ocr-b for (i1=i2=1,i=1;inum_frames;i++) { // get biggest holes if (i1==i2 && i!=i1) i2=i; if (box1->frame_vol[i ]frame_vol[i2] && i!=i1) i2=i; if (box1->frame_vol[i2]frame_vol[i1]) { j=i1;i1=i2;i2=j; } } if (i1==i2) Break; if (abs(box1->frame_vol[i1])>=box1->frame_vol[0]/8) Break; if (abs(box1->frame_vol[i2])>=box1->frame_vol[0]/8) Break; if (abs(box1->frame_vol[i1] - box1->frame_vol[i2]) // vol=4 >=abs(box1->frame_vol[i1] + box1->frame_vol[i2])/2) Break; // ToDo: check one box above and the other below dy/2 ("u) if (ad==100) {ad=ad*99/100;MSG(fprintf(stderr,"ad= %d",ad);)} } // MSG(fprintf(stderr,"aa2y dy %d %d",aa[2][1]-y0,dy);) if (box1->num_frames == 3 && box1->frame_vol[1]>0) { // small font? 7x8 /* 5x7 vol=4 @@....@ - @@...@@< ....@@. ...@@.. ..@@... .@@.... @@...@@ @....@@<- */ // aa[] belongs to the frame 0 only! 2010-10 if (aa[0][0]-x0< dx/4 && aa[0][1]-y0< dy/4) Break; // upper left / if (aa[2][0]-x0>3*dx/4 && aa[2][1]-y0>3*dy/4) Break; // lower right / if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left / if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right / if (dx>7 && ad==100) {ad=ad*99/100;MSG({})} // ocr-b if (box1->frame_vol[1]>=box1->frame_vol[0]) Break; if (abs(box1->frame_vol[1] - box1->frame_vol[2]) // vol=4 >=abs(box1->frame_vol[1] + box1->frame_vol[2])/8) Break; // ToDo: check one box above and the other below dy/2 ("u) if (ad==100) {ad=ad*99/100;MSG(fprintf(stderr,"ad= %d",ad);)} } if (box1->num_frames == 5 && sdata->holes.num == 2 && box1->frame_vol[1]>0) { // big font // aa[] belongs to the frame 0 only! 2010-10 if (aa[0][0]-x0< dx/4 && aa[0][1]-y0< dy/4) Break; // upper left / if (aa[2][0]-x0>3*dx/4 && aa[2][1]-y0>3*dy/4) Break; // lower right / if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left / if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right / if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3 && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3 && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break; if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4 && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4 && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4 && dx>7 && dy>15) Break; if (dx>7 && dy>12) { if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break; } else {ad=98*ad/100;MSG({})} } // use box1->p instead of b, because % is a sum of 3 objects + 2 holes if (loop(box1->p,x0,y0 ,dx,cs,0,RI) < loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI)){ad=96*ad/100;MSG({})} // X if (loop(box1->p,x1,y1 ,dx,cs,0,LE) < loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE)){ad=96*ad/100;MSG({})} // X for (x=0;xp,cs,2) != 2 ) break; } if (x=100) return ac; break; } // --- test Omega --------------------------------------------------- for(ad=d=99;dx>7 && dy>7;){ // min 3x4 DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; ) if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen" if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if (sdata->holes.num) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4 || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break; if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)dx/8) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i5*dx/8) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break; /* look for a vertikal gap at lower end */ for( x=dx/4;x<3*dx/4;x++ ){ i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if( i>3*dy/4 ) break; } if( x>=3*dx/4 ) Break; if( !hchar ) ad=60*ad/100; bc=GREEK_CAPITAL_LETTER_OMEGA; Setac(box1,bc,ad); break; } return bc; } // -------------------- OCR engine ;) ---------------------------- wchar_t ocr0(struct box *box1, pix *bp, int cs){ // pix p=*(box1->p); int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */ // ad,ac will be used in future wchar_t bc = UNKNOWN; // bestletter wchar_t um = SPACE; // modifier '" int hchar; // char is higher than e int gchar; // char has ink lower than m3 int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */ ocr0_shared_t sdata; // data used in all subfunctions sdata.box1=box1; sdata.bp=bp; sdata.cs=cs; // --- hchar --- gchar ------------------------- hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1; gchar=0;if( // 2018-09 nums1 bad m1..m4 y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; sdata.hchar=hchar; sdata.gchar=gchar; /* search for nearest points to the 4 courners, typical for xX */ /* this is faster as calling nearest_frame_vector 4 times */ /* 2018-09 ToDo: on 5x8 "y" this is a problem bottom right 2 vectors */ aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */ aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */ aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */ aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */ /* searching for 4 diagonal line ends */ for (i=0;inum_frame_vectors[0];i++) { x=box1->frame_vector[i][0]; /* take a vector */ y=box1->frame_vector[i][1]; /* distance to upper left end, normalized to 128 */ j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy); // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]); if (daa[j-1][3])) // 2018-09 5x8.y { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; } /* distance to upper right end */ j=3; d=sq((x-x1)*128/dx)+sq((y-y0)*128/dy); if (daa[j-1][3])) // 2018-09 5x8.z { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; } } for (i=0;i<16;i++) sdata.aa[i/4][i%4]=aa[i/4][i%4]; /* extract number position and size of holes and store in a table * - hole coordinates are relative to box (x-x0,y-y0) */ sdata.holes.num=0; /* set by num_hole(), ToDo18 better .num_frames */ if (box1->num_frames>0) // speedup v0.42 num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once // printf(" num_holes=%d\n",sdata.holes.num); /* after division of two glued chars, boundaries could be wrong, check this first (ToDo: only if a flag set?) */ if (2*y0 < box1->m2+box1->m3) if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){ /* could be a "I" from divided "Ij" or "Ig" */ for(y=(box1->m3+box1->m2)/2;2*ym3+box1->m4;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break; if(2*ym3+box1->m4) if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){ /* be sure, ~_ */ if (y>y0) y1=box1->y1=y; } } DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); ) DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); ) DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d" " (4 corners)" " d= %d %d %d %d vi= %d %d %d %d", aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0, aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0, aa[0][2], aa[1][2], aa[2][2], aa[3][2], aa[0][3], aa[1][3], aa[2][3], aa[3][3]);) // thin fonts may have inner and outer vector nearest to corner, 5x8.y DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);) // --- test thin lines - --------------------------------- for( ad=100; 2*dym3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font) DBG( wchar_t c_ask='-'; ) if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0), y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break; if( box1->dots ) { Setac(box1,'=',97);break; } if (dx<=2*dy) ad=99*ad/100; if (dx<=3*dy) ad=99*ad/100; if (!box1->m4) ad=96*ad/100; else { if (y1>=box1->m3) { if ( dx<2*dy) ad=98*ad/100; if (2*dx<3*dy) ad=98*ad/100; Setac(box1,'_',ad); break; } } Setac(box1,'-',ad); if (ad>=100) return '-'; break; } // --- test thin lines = --------------------------------- for( ad=100; dy>2 && dx>2; ){ // min 3x3 (small font) DBG( wchar_t c_ask='='; ) if (box1->num_frames != 2) break; for( y=y0;yp,cs,1)==1 ) break; if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break; if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break; if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break; if (2*dx < dy) Break; // 2010-10-08 if (4*dx < 3*dy) ad=ad*99/100; Setac(box1,'=',ad); return '='; } // --- test dots : --------------------------------- for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font) DBG( wchar_t c_ask=':'; ) if (box1->num_boxes!=2 || box1->num_subboxes!=0) Break; // check the gap hight for( i1=dy/16;i1p,cs,1)==0 ) break; if (i1>=dy/2) Break; for( i2=dy/16;i2p,cs,1)==0 ) break; if (i2>=dy/2) Break; if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~; if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i if (gchar) ad=99*ad/100; MSG(fprintf(stderr,"gap dot.dy12 %d %d ad %d",i1,i2,ad);) ad=ad-abs(i1-i2)/dy*20; if (abs(i1-dx)>dy/4) Break; // round or quadratic dots? if (abs(i1-dx)>dy/8) ad=98*ad/100; if (abs(i2-dx)>dy/4) Break; // round or quadratic dots? if (abs(i2-dx)>dy/8) ad=98*ad/100; if (box1->dots!=1) ad=96*ad/100; MSG(fprintf(stderr,"gap dot.dy12 %d %d ad %d",i1,i2,ad);) Setac(box1,':',ad); // dx<=3 ad-- if (ad>=100) return ':'; break; } // --- test dots ; --------------------------------- if ( 2*y0> box1->m2+box1->m1 ) // ~i if ( 4*y1>=3*box1->m3+box1->m2 ) // ~: if (box1->num_frames == 2) // 2010-10-08 for( ad=100; dy>5 && dx>1 && 2*dy>3*dx; ){ // max 3x3 (small font) DBG( wchar_t c_ask=';'; ) // better would it be to detect round pixelcluster on top // check high of upper and lower dot for( i1=0;i1p,cs,1)==0 ) break; if (i1>=dy/2) break; for( i2=0;i2p,cs,1)==0 ) break; if (i2m3) ad=97*ad/100; if (i2-i1=100) return ';'; break; } // --- first test small dots . --------------------------------- if( 3*dym4-box1->m1+2 && abs(dx-dy)<(dx+dy)/4+2 && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline? && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00 DBG( wchar_t c_ask='.'; ) d=0; r1=60;r2=140; ad=99; for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */ rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number ry=100*(2*y-(y0+y1))/dy; if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; } if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; } // MSG( fprintf(stderr,"x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); ) } if (loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)> dx/8) { ad=98*ad/100; // , if (loop(box1->p,x0,y1 ,x1-x0,cs,0,RI)<=dx/8) ad=98*ad/100; } // , MSG( fprintf(stderr,"d= %3d ad= %3d", d, ad); ) if(d==0) // 2018-09 adding 12.5% (1/8) tolerance for handwritten if( loop(box1->p,x0,y0+dy/8,x1-x0,cs,0,RI) // top left vs. komma? <= loop(box1->p,x0,y1-dy/8,x1-x0,cs,0,RI)+dx/8 // bottom left || loop(box1->p,x1,y0+dy/8,x1-x0,cs,0,LE) // top right >= loop(box1->p,x1,y1-dy/8,x1-x0,cs,0,LE)-dx/8 ) // bottom right { bc='.'; if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; } Setac(box1,bc,ad); } } // --- first test small dots , --------------------------------- if( 3*dy<2*(box1->m4-box1->m1) && 2*y0> box1->m2+box1->m3 && (2*dx<3*dy || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) && box1->dots==0 ){ // ocr-a-, DBG( wchar_t c_ask=','; ) ad=100; bc=','; if (dy==1 && dx==1) ad=98*ad/100; if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case if (dx>=dy) ad=99*ad/100; if (2*dy >= box1->m4-box1->m1) {ad=98*ad/100;MSG(fprintf(stderr,"ad= %d",ad);)} if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */ > loop(box1->p,x0,y1,x1-x0,cs,0,RI) && loop(box1->p,x1,y0,x1-x0,cs,0,LE) < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; } else { /* with upper filled circle or box */ if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)p,x1, y1 ,x1-x0,cs,0,LE)p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 ) // if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) // { ad=96*ad/100; MSG(fprintf(stderr,"ad= %d",ad);) } } // if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; } Setac(box1,bc,ad); } // --- first test small dots ''""`` (quotation)-------------- if (// 2*dy < box1->m4 -box1->m1+1 // failed for ocr-b dy=8 bad_m4-m1=13 dy < box1->m3 - box1->m2 // should work always 2010-10-08 && 2*y0 < box1->m2 + box1->m3 && 3*y1 < box1->m2+2*box1->m3+2 ){ DBG( wchar_t c_ask='\''; ) ad=100; bc='\''; if (3*y1>= box1->m2+2*box1->m3) { ad=96*ad/100; MSG({}) } if (2*y1>= box1->m2+ box1->m3) { ad=99*ad/100; MSG({}) } if (box1->num_frames>1) { // ~! if (2*y1>= box1->m2+ box1->m3) { ad=96*ad/100; MSG({}) } // ~! if (3*y1>=2*box1->m2+ box1->m3) { ad=96*ad/100; MSG({}) } if (get_bw(x0,x1,(box1->m2+3*box1->m3)/4,box1->m4,box1->p,cs,1)!=0) { ad=98*ad/100; MSG({}) } } if (dx>4 && num_cross(x0,x1,(y0+3*y1)/4,(y0+3*y1)/4,box1->p,cs) == 2) { // " " DBG( c_ask='\"'; ) MSG(fprintf(stderr,"double quote detected");) bc='\"'; // ocr-a-" has no gap! if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100; } else { if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100; if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100; if (dx>dy) { ad=96*ad/100; MSG({}) } } if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1 && num_cross(x0,x0, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100; if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100; if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font if ( loop(box1->p,x0,y0,dx,cs,0,RI)==0 && loop(box1->p,x0,y1,dx,cs,0,RI)> 0 && loop(box1->p,x1,y0,dx,cs,0,LE)> 0 && loop(box1->p,x1,y1,dx,cs,0,LE)==0) bc='`'; // 2010-10-08 qemu if (2*y0 > box1->m1+box1->m2) ad=99*ad/100; Setac(box1,bc,ad); if (ad>=100) return bc; } // --- first double dots ,, (quotation) -------------- 2010-10-01 if( 2*dy < box1->m4 -box1->m1+1 && 2*y0 >= box1->m2 +box1->m3 && y1 <= box1->m4+1 && box1->num_frames == 2 // ToDo: quotation without gap??? && box1->num_boxes == 2 && box1->num_subboxes == 0 && sdata.holes.num == 0 ){ DBG( wchar_t c_ask='"'; ) ad=100; bc='"'; if (2*y1 >= box1->m4) { ad=98*ad/100; MSG({}) } if (4*y0 <= box1->m2 + 3*box1->m3) { ad=99*ad/100; MSG({}) } if ( num_cross(x0,x1,(y0+3*y1)/4,(y0+3*y1)/4,box1->p,cs) != 2) ad=90*ad/100; if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100; if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100; if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font if (2*y1 > box1->m3+box1->m4) ad=99*ad/100; Setac(box1, DOUBLE_LOW_9_QUOTATION_MARK, ad); if (ad>=100) return bc; } // --- TILDE ~ --------------------------------- if( 2*dym4-box1->m1 && dx>=dy && dx>3 && dy>1 && 2*y0< box1->m1+box1->m2 && 3*y1<2*box1->m2+box1->m3 ){ if( loop(box1->p,x0,y0,dx,cs,0,RI) > loop(box1->p,x0,y1,dx,cs,0,RI) && loop(box1->p,x1,y0,dx,cs,0,LE) < loop(box1->p,x1,y1,dx,cs,0,LE) && num_cross(x0,x1,y0,y0,box1->p,cs) == 2 && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) { DBG( wchar_t c_ask='~'; ) bc=TILDE; Setac(box1,bc,99); } } // --- CIRCUMFLEX, hat ^ --------------------------------- if( 2*dym4-box1->m1 && dx>=dy && dx>2 && dy>1 && 2*y0< box1->m1+box1->m2 && 3*y1<2*box1->m2+box1->m3 && box1->num_frames == 1 // ToDo: 2010-10-01 what about \^O ??? && box1->num_boxes == 1 && box1->num_subboxes == 0 && sdata.holes.num == 0 ){ DBG( wchar_t c_ask='^'; ) if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI) > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8 || loop(box1->p,x0,y0 ,dx,cs,0,RI) > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 ) && ( loop(box1->p,x1,y0 ,dx,cs,0,LE) > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8 || loop(box1->p,x1,y0 ,dx,cs,0,LE) > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 ) && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1 && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2 || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) { bc='^'; Setac(box1,bc,99); } } // ------------------------------------------------------ // if( dots==1 ){ um='\''; } #if 0 /* ToDo: change to vectors, call here or in whatletter */ if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong) y=box1->m1; for(;yp,cs,1)==1) break; { i1=y; if( yp,cs,1)==0) break; if( ybox1->m2-box1->m1){ testumlaut(box1,cs,2,&um); // set modifier + new y0 ??? } } } #else um = box1->modifier; #endif if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){ for(y=y1;y>y0;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u } // --- test numbers 0..9 --- separated for faster compilation if( OCR_JOB->cfg.only_numbers ) return ocr0n(&sdata); // bc=ocr1(box1,bp,cs); if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100) return bc; // for fast compilable tests // ------ separated for faster compilation // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,... #define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100) IF_NOT_SURE bc=ocr0_eE(&sdata); IF_NOT_SURE bc=ocr0_f(&sdata); IF_NOT_SURE bc=ocr0_bB(&sdata); IF_NOT_SURE bc=ocr0_dD(&sdata); IF_NOT_SURE bc=ocr0_F(&sdata); IF_NOT_SURE bc=ocr0_uU(&sdata); IF_NOT_SURE bc=ocr0_micro(&sdata); IF_NOT_SURE bc=ocr0_vV(&sdata); IF_NOT_SURE bc=ocr0_rR(&sdata); IF_NOT_SURE bc=ocr0_m(&sdata); IF_NOT_SURE bc=ocr0_tT(&sdata); IF_NOT_SURE bc=ocr0_sS(&sdata); IF_NOT_SURE bc=ocr0_gG(&sdata); IF_NOT_SURE bc=ocr0_xX(&sdata); IF_NOT_SURE bc=ocr0_yY(&sdata); IF_NOT_SURE bc=ocr0_zZ(&sdata); IF_NOT_SURE bc=ocr0_wW(&sdata); IF_NOT_SURE bc=ocr0_aA(&sdata); IF_NOT_SURE bc=ocr0_cC(&sdata); IF_NOT_SURE bc=ocr0_lL(&sdata); IF_NOT_SURE bc=ocr0_oO(&sdata); IF_NOT_SURE bc=ocr0_pP(&sdata); IF_NOT_SURE bc=ocr0_qQ(&sdata); IF_NOT_SURE bc=ocr0_iIjJ(&sdata); IF_NOT_SURE bc=ocr0_n(&sdata); IF_NOT_SURE bc=ocr0_M(&sdata); IF_NOT_SURE bc=ocr0_N(&sdata); IF_NOT_SURE bc=ocr0_h(&sdata); IF_NOT_SURE bc=ocr0_H(&sdata); IF_NOT_SURE bc=ocr0_k(&sdata); IF_NOT_SURE bc=ocr0_K(&sdata); IF_NOT_SURE bc=ocr0n(&sdata); IF_NOT_SURE bc=ocr0_brackets(&sdata); IF_NOT_SURE bc=ocr0p9(&sdata); IF_NOT_SURE bc=ocr0px(&sdata); if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,""); if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0]; /* will be removed later, only fix old things */ for (i=0;inum_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; } return bc; }