1 /* GNU Ocrad - Optical Character Recognition program
2 Copyright (C) 2003-2019 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <algorithm>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <vector>
22 #include <stdint.h>
23
24 #include "common.h"
25 #include "rectangle.h"
26 #include "segment.h"
27 #include "ucs.h"
28 #include "bitmap.h"
29 #include "blob.h"
30 #include "profile.h"
31 #include "feats.h"
32
33
34 // Looks for three black sections in column hcenter() � n, then tests if
35 // upper and lower gaps are open to the right or to the left
36 //
test_235Esz(const Charset & charset) const37 int Features::test_235Esz( const Charset & charset ) const
38 {
39 const int csize = 3;
40 const int ucoff[csize] = { 0, -1, +1 };
41 const int lcoff[3*csize] = { 0, -1, +1, -1, 0, +1, +1, 0, -1 };
42
43 if( b.width() < 9 || b.height() > 3 * b.width() ||
44 bp.minima( b.height() / 2 ) > 1 ) return 0;
45
46 const int noise = ( std::min( b.height(), b.width() ) / 15 ) + 1;
47 int lrow1 = 0, urow2 = 0, lrow2 = 0, urow3 = 0;
48 int lcol1 = 0, ucol2 = 0, lcol2 = 0, ucol3 = 0;
49 bool done = false;
50
51 for( int i = 0; i < csize && !done; ++i )
52 {
53 const int ucol = b.hcenter() + ( noise * ucoff[i] );
54 int row = b.top() + tp[ucol-b.left()];
55 while( ++row < b.bottom() && b.get_bit( row, ucol ) ) ;
56 if( row <= b.vpos( 30 ) ) { lrow1 = row; lcol1 = ucol; } else continue;
57 while( ++row < b.bottom() && !b.get_bit( row, ucol ) ) ;
58 if( row < b.bottom() )
59 {
60 urow2 = row - 1; ucol2 = ucol;
61 for( int j = 0; j < csize && !done; ++j )
62 {
63 row = urow2 + 1;
64 const int lcol = b.hcenter() + ( noise * lcoff[(csize*i)+j] );
65 if( ucol != lcol )
66 {
67 const int d = ( ucol > lcol ) ? +1 : -1;
68 int c = lcol; while( c != ucol && b.get_bit( row, c ) ) c += d;
69 if( c != ucol ) continue;
70 }
71 while( ++row < b.bottom() && b.get_bit( row, lcol ) ) ;
72 if( row < b.bottom() ) { lrow2 = row; lcol2 = lcol; } else continue;
73 while( ++row <= b.bottom() && !b.get_bit( row, lcol ) ) ;
74 if( row <= b.bottom() && row > b.vpos( 70 ) )
75 { urow3 = row - 1; ucol3 = lcol; done = true; }
76 }
77 }
78 }
79 if( !done ) return 0;
80
81 const bool bopen = b.escape_bottom( urow3, ucol3 );
82 const bool topen = b.escape_top( lrow1, lcol1 );
83 const bool tbopen = bopen && topen;
84 const int ascode = ( b.get_bit( b.vcenter(), b.hcenter() ) ) ? '*' : 0;
85 if( b.escape_left( lrow2, lcol2 ) )
86 {
87 if( b.escape_left( urow2, ucol2 ) )
88 {
89 if( tbopen ) return ascode;
90 if( !bopen && !topen && b.height() <= 3 * b.width() )
91 {
92 const int lm = lp.minima(), rm = rp.minima();
93 if( ( lm == 3 || lm == 2 ) &&
94 ( rm == 2 || ( rm == 1 && rp.iminimum() < rp.pos( 80 ) ) ) )
95 return '3';
96 }
97 }
98 else if( b.escape_right( urow2, ucol2 ) )
99 {
100 if( tbopen ) return ascode;
101 if( rp[lrow1 + 1 - b.top()] >= lcol1 - b.left() &&
102 ( lp[lrow2 + 1 - b.top()] < lcol2 - b.left() ||
103 lp[urow3 - 1 - b.top()] < ucol3 - b.left() ) )
104 {
105 for( int i = lp.pos( 40 ); i <= lp.pos( 70 ); ++i )
106 if( 5 * lp[i] < b.width() && 2 * lp[i+1] > b.width() ) return '5';
107 int c = 0, hdiff;
108 if( !b.top_hook( &hdiff ) || 5 * hdiff >= 4 * b.height() ) ++c;
109 if( 2 * lp[lrow2 - b.top()] < lcol2 - b.left() ) ++c;
110 if( !tp.isconvex() || ( !tp.ispit() && bp.ispit() ) ) ++c;
111 if( c >= 2 ) return '5';
112 }
113 if( charset.enabled( Charset::iso_8859_15 ) ||
114 charset.enabled( Charset::iso_8859_9 ) )
115 if( urow2 > b.vpos( 55 ) &&
116 b.seek_right( urow2 - 1, ucol2 ) < b.right() )
117 { if( urow2 > b.vpos( 63 ) ) return UCS::CCCEDI;
118 else return UCS::SCCEDI; }
119 return 's';
120 }
121 }
122 else if( b.escape_right( lrow2, lcol2 ) )
123 {
124 if( b.escape_right( urow2, ucol2 ) )
125 {
126 if( tbopen ) return ascode;
127 if( bp.minima( b.height() / 5 ) == 1 )
128 {
129 if( 8 * lp[((lrow2+urow3)/2)-b.top()] >= b.width() &&
130 b.escape_top( ( lrow1 + urow2 ) / 2, b.left() ) &&
131 !b.escape_top( ( lrow2 + urow3 ) / 2, b.left() ) ) return 'f';
132 if( rp.minima( b.width() / 8 ) < 3 && b.escape_bottom( urow3, ucol3 ) )
133 {
134 if( charset.enabled( Charset::iso_8859_15 ) ||
135 charset.enabled( Charset::iso_8859_9 ) )
136 if( 2 * lp[lp.pos(95)] > rp[rp.pos(95)] )
137 { if( urow2 > b.vpos( 63 ) ) return UCS::CCCEDI;
138 else return UCS::SCCEDI; }
139 return 'F';
140 }
141 else if( lrow1 < urow2 && urow2 + 2 < lrow2 && lrow2 < urow3 &&
142 urow2 <= b.vcenter() && lrow2 >= b.vcenter() ) return 'E';
143 }
144 }
145 else if( b.escape_left( urow2, ucol2 ) )
146 {
147 if( !tbopen && ( 2 * lp[lp.pos(50)] ) + 2 >= b.width() &&
148 ( tp.isconvex() || ( (tp.ispit() || tp.isrtip()) && !bp.ispit() ) ) )
149 return '2';
150 if( 2 * b.height() <= 5 * wp.max() && bp[bp.pos(75)] <= b.height() / 10 &&
151 Ocrad::similar( wp.max( 0, wp.pos(30) ), wp.max( wp.pos(70) ), 20 ) )
152 return 'z';
153 }
154 }
155 return 0;
156 }
157
158
test_EFIJLlT(const Charset & charset,const Rectangle & charbox) const159 int Features::test_EFIJLlT( const Charset & charset, const Rectangle & charbox ) const
160 {
161 if( tp.minima( b.height() / 4 ) != 1 || bp.minima( b.height() / 4 ) != 1 )
162 return 0;
163
164 const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
165 {
166 const bool maybe_j = ( 2 * ( lp[lp.pos(50)] + noise ) >= b.width() );
167 const int col = b.hpos( maybe_j ? 25 : 75 );
168 int row = b.seek_top( b.vcenter(), col );
169 if( row <= b.top() || ( row < b.vpos( 25 ) && b.escape_top( row, col ) ) )
170 {
171 int hdiff;
172 if( b.bottom_hook( &hdiff ) )
173 {
174 if( maybe_j && hdiff > b.height() / 2 &&
175 rp.increasing( rp.pos( 80 ), 1 ) && !rp.decreasing() ) return 'J';
176 if( !maybe_j && -hdiff > b.height() / 2 )
177 {
178 if( 5 * lp[lp.pos(80)] >= 2 * b.width() ) return 'v'; // broken 'v'
179 if( col > b.hcenter() ) return 'L';
180 }
181 }
182 }
183 }
184
185 const int vnoise = ( b.height() / 30 ) + 1;
186 const int topmax = b.top() + vnoise;
187 const int botmin = b.bottom() - vnoise;
188 if( vbars() == 1 && vbar(0).width() >= 2 && 2 * vbar(0).width() <= b.width() )
189 {
190 if( std::abs( vbar(0).hcenter() - b.hcenter() ) <= noise &&
191 std::abs( (vbar(0).left() - b.left()) - (b.right() - vbar(0).right()) ) <= 2 * noise )
192 {
193 if( hbars() == 1 && 4 * hbar(0).height() <= b.height() )
194 {
195 if( ( hbar(0).top() <= topmax || hbar(0).bottom() < b.vpos( 15 ) ) &&
196 hbar(0).width() >= wp[wp.pos(75)] + wp[wp.pos(80)] &&
197 4 * lp[lp.pos(50)] >= b.width() )
198 return 'T';
199 if( std::abs( hbar(0).vcenter() - b.vcenter() ) <= vnoise &&
200 hbar(0).width() >= b.width() &&
201 Ocrad::similar( b.height(), b.width(), 50 ) ) return '+';
202 }
203 if( hbars() == 2 &&
204 hbar(0).top() <= topmax && 4 * hbar(0).height() <= b.height() &&
205 hbar(1).bottom() >= botmin && 4 * hbar(1).height() <= b.height() &&
206 3 * hbar(0).width() > 4 * hbar(1).width() )
207 return 'T';
208 }
209 }
210
211 if( vbars() == 1 && vbar(0).width() >= 2 )
212 {
213 if( 2 * vbar(0).width() <= b.width() && vbar(0).right() <= b.hcenter() )
214 {
215 if( ( hbars() == 2 || hbars() == 3 ) && hbar(0).top() <= topmax &&
216 hbar(0).width() + 1 >= hbar(1).width() &&
217 2 * hbar(1).width() >= 3 * vbar(0).width() &&
218 vbar(0).h_overlaps( hbar(1) ) )
219 {
220 if( hbars() == 3 &&
221 Ocrad::similar( hbar(0).width(), hbar(2).width(), 10, 2 ) &&
222 10 * hbar(2).width() >= 9 * hbar(1).width() &&
223 hbar(0).left() <= hbar(1).left() + 1 )
224 return 'E';
225 if( ( hbars() == 2 || hbar(0).width() > hbar(2).width() ) &&
226 ( hbar(1).includes_vcenter( b ) ||
227 ( 3 * hbar(1).width() > 2 * hbar(0).width() &&
228 10 * lp[vnoise] < b.width() && hbar(1).top() > b.vpos( 30 ) &&
229 hbar(1).bottom() < b.vpos( 60 ) ) ) )
230 return 'F';
231 }
232 if( hbars() == 2 && hbar(1).bottom() >= botmin &&
233 b.height() > b.width() && hbar(1).width() > hbar(0).width() &&
234 std::abs( vbar(0).hcenter() - hbar(0).hcenter() ) <= 1 &&
235 rp.iminimum() > rp.pos( 70 ) )
236 return 'L';
237 if( hbars() == 1 && Ocrad::similar( hbar(0).width(), b.width(), 10 ) &&
238 vbar(0).left() <= b.hpos( 30 ) )
239 {
240 if( hbar(0).bottom() >= botmin &&
241 b.escape_top( b.vcenter(), b.hpos( 75 ) ) )
242 return 'L';
243 if( hbar(0).top() <= topmax && 2 * wp[wp.pos(50)] >= b.width() &&
244 4 * wp[wp.pos(75)] < b.width() &&
245 b.escape_right( b.vpos( 25 ), b.hcenter() ) )
246 return 'F';
247 }
248 }
249
250 if( 3 * vbar(0).width() < 2 * b.width() && vbar(0).left() > b.hpos( 33 ) &&
251 hbars() == 1 )
252 {
253 if( vbar(0).right() >= b.hpos( 90 ) && hbar(0).bottom() >= botmin &&
254 hbar(0).left() == b.left() &&
255 b.bottom() > charbox.vpos( 90 ) &&
256 b.escape_top( b.vcenter(), b.hpos( 25 ) ) )
257 { if( b.height() > b.width() ) return 'J'; else return 0; }
258 if( hbar(0).top() <= topmax && hbar(0).width() + 1 >= b.width() &&
259 b.width() > b.height() )
260 {
261 if( charset.enabled( Charset::iso_8859_15 ) ||
262 charset.enabled( Charset::iso_8859_9 ) ) return UCS::NOT;
263 return 0;
264 }
265 }
266 }
267
268 if( vbars() == 1 && vbar(0).width() >= 2 &&
269 tp.minima() == 1 && bp.minima() == 1 )
270 {
271 if( 3 * b.height() > 4 * b.width() &&
272 Ocrad::similar( vbar(0).left() - b.left(),
273 b.right() - vbar(0).right(), 30, 2 * noise ) )
274 {
275 if( b.height() <= 3 * wp.max() && rp.istip() && lp.istip() )
276 {
277 if( b.height() <= 3 * b.width() &&
278 lp[lp.pos(40)] > lp[lp.pos(60)] + noise &&
279 rp[rp.pos(60)] > rp[rp.pos(40)] + noise ) return 'z';
280 return 'I';
281 }
282 if( rp.isflats() &&
283 ( lp.istip() || lp.isflats() ||
284 ( lp.isctip() && lp.minima() == 2 &&
285 lp.iminimum() < lp.pos( 30 ) && lp.iminimum(1) > lp.pos( 80 ) ) ) )
286 return 'l';
287 if( b.height() > 3 * wp.max() )
288 {
289 if( rp.istip() && lp.ispit() && Ocrad::similar( lp.iminimum(), lp.pos( 50 ), 10 ) )
290 { if( lp.istpit() ) return '{'; else return '('; }
291 if( lp.istip() && rp.ispit() && Ocrad::similar( rp.iminimum(), rp.pos( 50 ), 10 ) )
292 { if( rp.istpit() ) return '}'; else return ')'; }
293 if( rp.isflats() && 2 * vbar(0).size() >= b.area() ) return 'l';
294 }
295 if( 2 * b.height() > 3 * b.width() && lp.minima() <= 2 )
296 if( rp.isflats() || rp.minima() == 1 )
297 if( vbar(0).right() >= b.hpos( 70 ) ||
298 b.escape_top( b.vpos( 75 ), std::min( b.right(), vbar(0).right() + 1 ) ) )
299 for( int i = vbar(0).left() - 1; i > b.left(); --i )
300 if( b.seek_bottom( b.vpos( 75 ), i ) < b.bottom() &&
301 bp[i-b.left()] <= noise ) return 'l';
302 }
303 if( vbar(0).right() >= b.right() - 1 )
304 {
305 if( lp.istip() && b.height() > 2 * b.width() )
306 {
307 if( 2 * vbar(0).width() <= wp.max() &&
308 lp[lp.pos(50)] >= b.width() / 2 ) return ']';
309 if( b.height() >= 3 * b.width() ) return 'l';
310 }
311 if( 2 * b.height() >= 3 * b.width() &&
312 vbar(0).height() >= 3 * vbar(0).width() &&
313 lp.istpit() && lp.minima() == 1 )
314 { const int i = lp.iminimum();
315 if( i > lp.pos( 10 ) && i < lp.pos( 40 ) ) return '1'; }
316 }
317 }
318 if( hbars() == 1 && hbar(0).width() >= b.width() &&
319 std::abs( hbar(0).vcenter() - b.vcenter() ) <= vnoise &&
320 Ocrad::similar( b.height(), b.width(), 50 ) &&
321 tp.isupit() && bp.isupit() )
322 return '+';
323 return 0;
324 }
325
326
test_c() const327 int Features::test_c() const
328 {
329 if( lp.isconvex() || lp.ispit() )
330 {
331 int urow = b.seek_top( b.vcenter(), b.hcenter() );
332 int lrow = b.seek_bottom( b.vcenter(), b.hcenter() );
333
334 if( b.height() > 2 * b.width() && 3 * wp.max() <= 2 * b.width() )
335 { if( lp.isconvex() ) return '('; else return 0; }
336
337 if( urow > b.top() && lrow < b.bottom() && rp.isctip() &&
338 ( bp.ispit() || tp.ispit() || ( bp.isltip() && tp.isltip() ) ) &&
339 b.escape_right( b.vcenter(), b.hcenter() ) )
340 return 'c';
341 }
342
343 if( b.height() > 2 * b.width() && rp.isconvex() )
344 {
345 int urow = b.seek_top( b.vcenter(), b.hcenter() );
346 int lrow = b.seek_bottom( b.vcenter(), b.hcenter() );
347
348 if( 3 * wp.max() <= 2 * b.width() ||
349 ( 2 * lp[urow-b.top()] >= b.width() && 2 * lp[lrow-b.top()] >= b.width() ) )
350 return ')';
351 }
352
353 return 0;
354 }
355
356
test_frst(const Rectangle & charbox) const357 int Features::test_frst( const Rectangle & charbox ) const
358 {
359 if( bp.minima( b.height() / 4 ) != 1 || tp.minima( b.height() / 2 ) != 1 ||
360 bp.minima( b.height() / 2 ) != 1 ) return 0;
361 const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
362 const bool maybe_slanted_r = ( tp.minima( b.height() / 4 ) != 1 );
363 bool maybe_t = true;
364
365 if( !maybe_slanted_r )
366 {
367 int b_hdiff = 0, t_hdiff = 0;
368 if( b.bottom_hook( &b_hdiff ) )
369 {
370 if( -2 * b_hdiff > b.height() )
371 {
372 if( b.height() >= 3 * wp.max() && !lp.ispit() &&
373 ( hbars() == 0 || hbar(0).bottom() < b.vpos( 20 ) ) ) return 'l';
374 if( 2 * wp[wp.pos(6)] < b.width() && hbars() >= 1 && hbars() <= 2 &&
375 hbar(0).top() >= b.vpos( 15 ) && hbar(0).bottom() < b.vcenter() &&
376 Ocrad::similar( hbar(0).width(), wp.max(), 10 ) ) return 't';
377 }
378 }
379 if( b.top_hook( &t_hdiff ) )
380 {
381 if( 3 * t_hdiff > 2 * b.height() && b.height() > 2 * wp.max() &&
382 tp.iminimum() > tp.pos( 50 ) && bp.iminimum() <= bp.pos( 50 ) &&
383 ( !b_hdiff || rp.increasing( rp.pos( 50 ) ) ) )
384 return 'f';
385 if( 2 * b_hdiff > b.height() && 2 * t_hdiff > b.height() )
386 return 0; // recognized 's' or SCCEDI
387 maybe_t = false;
388 }
389 }
390
391 if( 2 * rp[rp.pos(50)] > b.width() &&
392 2 * bp[bp.pos(50)] > b.height() && tp.isctip() ) return 'r';
393
394 if( maybe_slanted_r || vbars() != 1 || vbar(0).width() < 2 ) return 0;
395 if( vbar(0).hcenter() <= b.hcenter() )
396 {
397 const int col = b.right() - rp[rp.pos(50)] + 2;
398 if( col < b.right() )
399 {
400 const int row = b.seek_bottom( b.vcenter(), col );
401 if( row >= b.bottom() || b.escape_bottom( row - 1, col ) )
402 {
403 if( rp.minima() == 3 )
404 { if( rp.minima( b.width() / 8 ) < 3 ) return 'f'; else return 0; }
405 if( Ocrad::similar( b.height(), b.width(), 40 ) )
406 {
407 if( tp.minima( b.height() / 8 ) == 2 &&
408 bp.minima( b.height() / 8 ) == 2 ) return 'x';
409 int row2 = b.vpos( 75 );
410 int col2 = b.seek_right( row2, b.hcenter(), false ) + 1;
411 if( b.seek_right( row2, col2 ) >= b.right() )
412 {
413 if( lp.isconvex() && ( col > b.hpos( 60 ) || row < b.bottom() ) )
414 return 0;
415 if( ( hbars() == 1 ||
416 ( hbars() == 2 && hbar(1).bottom() >= b.bottom() - 1 &&
417 2 * hbar(0).width() > 3 * hbar(1).width() ) ) &&
418 hbar(0).top() <= b.top() + 1 &&
419 4 * hbar(0).height() <= b.height() &&
420 4 * lp[lp.pos(50)] >= b.width() )
421 return 'T';
422 if( 3 * rp[rp.pos(50)] > b.width() ) return 'r';
423 return 0;
424 }
425 }
426 }
427 if( Ocrad::similar( b.height(), b.width(), 40 ) &&
428 segments_in_row( b.vpos( 15 ) ) == 3 &&
429 segments_in_row( b.vpos( 85 ) ) == 3 &&
430 b.seek_right( row - 1, col ) < b.right() && lp.isctip() )
431 return 'x';
432 }
433 if( 3 * b.height() > 4 * b.width() && vbar(0).left() > b.left() &&
434 rp.minima() <= 2 )
435 {
436 const int col = b.right() - std::max( 0, rp[rp.pos(50)] - 1 );
437 if( !b.escape_bottom( b.vcenter(), col ) )
438 {
439 if( 3 * wp[wp.pos(6)] < 2 * b.width() && tp.ispit() &&
440 lp.iminimum() < lp.pos( 40 ) ) return 't';
441 else return 0;
442 }
443 else if( 2 * wp.max() > b.width() )
444 {
445 if( rp.iminimum() < rp.pos( 20 ) )
446 {
447 if( rp.increasing( rp.pos( 20 ) ) || bp.increasing() ||
448 tp.minima( noise ) == 2 ||
449 ( rp.minima() == 1 && ( b.height() < charbox.height() || tp.iminimum() > tp.pos( 50 ) ) ) )
450 { if( b.height() <= 3 * wp.max() ) return 'r'; else return 0; }
451 else if( 3 * b.height() >= 5 * b.width() && !rp.istip() ) return 'f';
452 }
453 else
454 {
455 if( maybe_t && !rp.isconvex() && bp.minima( b.height() / 3 ) == 1 )
456 return 't';
457 else return 0;
458 }
459 }
460 }
461 if( b.seek_bottom( b.vcenter(), b.hpos( 60 ) + 1 ) >= b.bottom() )
462 { if( rp.minima() == 2 ) return 'f'; else return 'r'; }
463 if( vbar(0).right() <= b.hcenter() && hbars() == 1 &&
464 hbar(0).bottom() >= b.bottom() - 1 && lp.istip() && rp.istip() &&
465 !b.escape_top( b.vcenter(), b.hpos( 75 ) ) )
466 return 'r';
467 }
468 return 0;
469 }
470
471
test_G() const472 int Features::test_G() const
473 {
474 if( lp.isconvex() || lp.ispit() )
475 {
476 int col = 0, row = 0;
477 for( int i = rp.pos( 60 ); i >= rp.pos( 30 ); --i )
478 if( rp[i] > col ) { col = rp[i]; row = i; }
479 if( col == 0 ) return 0;
480 row += b.top(); col = b.right() - col + 1;
481 if( col <= b.left() || col >= b.hcenter() ) return 0;
482
483 col = ( col + b.hcenter() ) / 2;
484 row = b.seek_bottom( row, col );
485 if( row < b.bottom() && b.escape_right( row, col ) &&
486 !b.escape_bottom( row, b.hcenter() ) )
487 {
488 const int noise = std::max( 2, b.height() / 20 );
489 int lrow, urow;
490 for( lrow = row - 1 ; lrow > b.top(); --lrow )
491 if( b.seek_right( lrow, b.hcenter() ) >= b.right() ) break;
492 for( urow = lrow - 1 ; urow > b.top(); --urow )
493 if( b.seek_right( urow, b.hcenter() ) < b.right() ) break;
494 lrow += noise;
495 if( lrow < row && urow > b.top() )
496 {
497 urow -= std::min( noise, ( urow - b.top() ) / 2 );
498 int uwidth = b.seek_left( urow, b.right() ) - b.seek_right( urow, b.hcenter() );
499 int lwidth = b.seek_left( lrow, b.right() ) - b.seek_right( lrow, b.hcenter() );
500 if( lrow - noise <= b.vcenter() || lwidth > uwidth + noise )
501 return 'G';
502 }
503 }
504 }
505 return 0;
506 }
507
508
509 // Common feature: U-shaped top of character
510 //
test_HKMNUuvwYy(const Rectangle & charbox) const511 int Features::test_HKMNUuvwYy( const Rectangle & charbox ) const
512 {
513 if( tp.minima( b.height() / 5 ) == 2 && tp.minima( b.height() / 4 ) == 2 &&
514 tp.minima( b.height() / 2 ) <= 3 && tp.isctip() )
515 {
516 const int noise = ( std::min( b.height(), b.width() ) / 30 ) + 1;
517 const int m5 = bp.minima( b.height() / 5 );
518 if( 2 * b.height() >= b.width() && b.height() >= 10 &&
519 ( m5 == 1 ||
520 ( m5 == 2 && Ocrad::similar( bp.iminimum(), bp.pos( 50 ), 10 ) ) ) )
521 {
522 const int stem = std::min( tp.range() + ( b.height() / 10 ), wp.pos(90) );
523 const bool maybe_Y = ( 5 * tp.range() <= 3 * b.height() ||
524 ( stem <= wp.pos(75) && 5 * wp[stem] <= b.width() ) );
525 const int lg = lp.min( lp.pos( 90 ) );
526 if( lg > 1 && bp.isvpit() && tp.minima( b.height() / 2 ) == 2 &&
527 lp[lp.pos(75)] <= lg &&
528 ( !maybe_Y || 3 * wp[stem] > b.width() || wp[stem] > wp[wp.pos(90)] + 1 ) )
529 return 'v';
530 int hdiff;
531 if( b.bottom_hook( &hdiff ) )
532 {
533 if( std::abs( hdiff ) <= b.height() / 8 )
534 {
535 if( segments_in_row( b.vpos( 30 ) ) >= 3 ) return 'v';
536 if( bp.isconvex() )
537 { if( 9 * wp[wp.pos(30)] > 10 * wp[wp.pos(50)] &&
538 9 * wp[wp.pos(50)] > 10 * wp[wp.pos(70)] ) return 'v';
539 else return 'u'; }
540 }
541 if( hdiff > b.height() / 2 )
542 { if( bp.minima( b.height() / 2 ) == 1 ) return 'y'; else return 0; }
543 }
544 const int rg = rp.min( rp.pos( 90 ) );
545 const int lg2 = lp.max( lp.pos( 70 ), lp.pos( 90 ) );
546 const int rg2 = rp.max( rp.pos( 70 ), rp.pos( 90 ) );
547 const int lc = ( lg + ( 2 * ( lp.limit() - rg ) ) ) / 3;
548 const int lc2 = ( lg2 + lp.limit() - rg2 ) / 2;
549 if( bp.ispit() && maybe_Y )
550 {
551 int row2 = b.top();
552 while( row2 < b.bottom() && segments_in_row( row2 ) != 2 ) ++row2;
553 int row1 = row2 + 1;
554 while( row1 < b.bottom() && segments_in_row( row1 ) != 1 ) ++row1;
555 if( row1 < b.bottom() ) row1 += wp[row1-b.top()] / 4;
556 if( row1 < b.bottom() && wp[row1-b.top()] < b.width() )
557 {
558 const int w1 = wp[row1-b.top()];
559 int row0 = w1 * ( row1 - row2 ) / ( b.width() - w1 ) + row1;
560 if( row0 < b.bottom() && 2 * wp[wp.pos(70)] < b.width() &&
561 ( Ocrad::similar( lg, rg, 20 ) ||
562 ( lg > 1 && lg < rg && lc >= lc2 && !rp.increasing() ) ) )
563 return 'Y';
564 }
565 }
566 if( b.escape_top( b.vpos( 60 ), b.hcenter() ) && !lp.istip() &&
567 ( 4 * b.height() >= 3 * b.width() ||
568 segments_in_col( b.hpos( 75 ) ) <= 2 ) ) return 'u';
569 if( lg < rg + 1 && !lp.increasing( lp.pos( 50 ) ) &&
570 ( 2 * lg < rg || b.vpos( 90 ) >= charbox.bottom() ) &&
571 ( tp.minima( b.height()/2 ) == 1 || lp.imaximum() > b.height()/2 ) )
572 return 'y';
573 if( lg > 1 && bp.ispit() && tp.minima( b.height() / 3 ) == 2 )
574 return 'v';
575 if( lg <= 1 && 2 * ( b.width() - rg - lg ) < b.width() &&
576 rp.increasing() && tp.minima( b.height() / 2 ) == 2 ) return 'v';
577 return 0;
578 }
579 if( 2 * b.height() >= b.width() && b.height() >= 9 &&
580 bp.minima() == 2 && bp.isctip() )
581 {
582 const int th = std::max( b.height() / 4, bp[bp.pos(50)] + noise );
583 if( bp.minima( th ) == 3 ) return 'M';
584 const int lg = lp[lp.pos(50)];
585 const int rg = rp[rp.pos(50)];
586 if( Ocrad::similar( lg, rg, 80, 2 ) &&
587 4 * lg < b.width() && 4 * rg < b.width() )
588 {
589 if( lg > 1 && rg > 1 && lp.increasing() && rp.increasing() &&
590 5 * tp[tp.pos(50)] > b.height() )
591 return 'w';
592 if( hbars() == 1 && 5 * ( hbar(0).height() - 1 ) < b.height() &&
593 hbar(0).top() >= b.vpos( 30 ) && hbar(0).bottom() <= b.vpos( 60 ) &&
594 10 * hbar(0).width() > 9 * wp[hbar(0).vcenter()-b.top()] &&
595 Ocrad::similar( v_segment( hbar(0).vcenter(), hbar(0).hcenter() ).size(),
596 hbar(0).height(), 30, 2 ) )
597 {
598 if( 9 * hbar(0).width() <= 10 * wp[wp.pos(50)] ) return 'H';
599 return 0;
600 }
601 if( segments_in_row( b.vpos( 60 ) ) == 4 ||
602 segments_in_row( b.vpos( 70 ) ) == 4 )
603 {
604 if( 2 * tp[tp.pos(50)] > b.height() ) return 'M';
605 return 'w';
606 }
607 if( ( vbars() <= 2 || ( vbars() == 3 && b.height() >= b.width() ) ) &&
608 tp.minima( b.height() / 2 ) <= 2 &&
609 tp.minima( ( 2 * b.height() ) / 5 ) <= 2 && !lp.istpit() &&
610 4 * std::abs( rp[rp.pos(20)] - rp[rp.pos(80)] ) <= b.width() )
611 {
612 const int row = b.top() + tp[tp.pos(50)];
613 if( row > b.vcenter() )
614 {
615 Rectangle r( b.left(), b.top(), b.hcenter(), b.bottom() );
616 Bitmap bm( b, r );
617 int hdiff;
618 if( bm.bottom_hook( &hdiff ) && -2 * hdiff > bm.height() ) return 'u';
619 }
620 if( row > b.vpos( 10 ) || vbars() >= 2 ) return 'N';
621 }
622 return 0;
623 }
624 if( 3 * lg < 2 * rg && lg < b.width() / 4 && rg > b.width() / 4 &&
625 rp.isctip() && tp.minima( b.height() / 8 ) == 2 ) return 'K';
626 return 0;
627 }
628 if( bp.minima() <= 2 && 2 * b.width() > 5 * b.height() ) return '~';
629 if( bp.minima() == 3 &&
630 ( hbars() == 0 || ( hbars() == 1 && hbar(0).top() >= b.vpos( 20 ) ) ) )
631 return 'M';
632 }
633 return 0;
634 }
635
636
637 // Looks for the nearest frontier in column hcenter(), then tests if
638 // gap is open downwards (except for 'x')
639 //
test_hknwx(const Rectangle & charbox) const640 int Features::test_hknwx( const Rectangle & charbox ) const
641 {
642 const int m8 = tp.minima( b.height() / 8 );
643
644 if( m8 == 2 && bp.minima( b.height() / 2 ) == 1 &&
645 ( ( lp.isctip() && rp.isctip() ) ||
646 ( lp.isconcave() && rp.isconcave() ) ) ) return 'x';
647
648 if( b.width() >= b.height() && tp.ispit() &&
649 ( b.bottom() < charbox.vcenter() || ( lp.decreasing() && rp.decreasing() ) ) )
650 return '^';
651
652 int col = 0, row = 0;
653 for( int i = bp.pos( 40 ); i <= bp.pos( 60 ); ++i )
654 if( bp[i] > row ) { row = bp[i]; col = i; }
655 row = b.bottom() - row + 1; col += b.left();
656 if( row > b.vpos( 90 ) || row <= b.top() ) return 0;
657 // FIXME follow gap up
658 { int c = col; col = b.seek_right( row, col ); if( col > c ) --col;
659 row = b.seek_top( row, col ); }
660
661 const int urow = b.seek_top( row - 1, col, false );
662 if( urow > b.vpos( 20 ) || 3 * tp[tp.pos(60)] > b.height() )
663 {
664 const int m5 = tp.minima( b.height() / 5 );
665 if( m5 == 3 && segments_in_row( b.vcenter() ) == 2 &&
666 segments_in_row( b.vpos( 80 ) ) == 3 ) return 0; // merged 'IX'
667 if( ( m5 == 2 || m5 == 3 ) && tp.minima() >= 2 &&
668 rp[rp.pos(25)] <= b.width() / 4 &&
669 ( !lp.istpit() || rp.minima() == 1 ) ) return 'w';
670 if( m5 == 1 && m8 == 1 && 4 * tp.max( tp.pos(40), tp.pos(60) ) < 3 * b.height() )
671 { if( rp.isctip( 66 ) ) return 'k'; else return 'h'; }
672 return 0;
673 }
674 if( Ocrad::similar( b.height(), b.width(), 40 ) && row > b.vcenter() &&
675 urow < b.vcenter() && tp.minima( b.height() / 5 ) == 2 &&
676 bp.minima( urow + 1 ) == 3 )
677 return 'w';
678 if( urow <= b.vpos( 20 ) && tp.minima( b.height() / 4 ) == 1 &&
679 Ocrad::similar( b.height(), b.width(), 40 ) &&
680 ( 8 * ( rp[rp.pos(50)] - 1 ) <= b.width() ||
681 tp[tp.pos(99)] > b.height() / 2 ) )
682 return 'n';
683 return 0;
684 }
685
686
687 // Looks for four black sections in column hcenter() � 1, then tests if
688 // upper gap is open to the right and lower gaps are open to the left
689 //
test_s_cedilla() const690 int Features::test_s_cedilla() const
691 {
692 int urow2 = 0, urow3 = 0, urow4 = 0, col, black_section = 0;
693
694 for( col = b.hcenter() - 1; col <= b.hcenter() + 1; ++col )
695 {
696 bool prev_black = false;
697 for( int row = b.top(); row <= b.bottom(); ++row )
698 {
699 bool black = b.get_bit( row, col );
700 if( black && !prev_black )
701 {
702 if( ++black_section == 2 ) urow2 = row - 1;
703 else if( black_section == 3 ) urow3 = row - 1;
704 else if( black_section == 4 ) urow4 = row - 1;
705 }
706 prev_black = black;
707 }
708 if( black_section == 4 && urow2 < b.vpos( 50 ) && urow4 >= b.vpos( 70 ) )
709 break;
710 black_section = 0;
711 }
712
713 if( black_section == 4 && b.escape_right( urow2, col ) &&
714 b.escape_left( urow3, col ) && b.escape_left( urow4, col ) )
715 return UCS::SSCEDI;
716 return 0;
717 }
718
719
test_comma() const720 bool Features::test_comma() const
721 {
722 if( b.holes() || b.height() <= b.width() || b.height() > 3 * b.width() )
723 return false;
724
725 if( b.width() >= 3 && b.height() >= 3 )
726 {
727 int upper_area = 0;
728 for( int row = b.top(); row < b.top() + b.width(); ++row )
729 for( int col = b.left(); col <= b.right(); ++col )
730 if( b.get_bit( row, col ) ) ++upper_area;
731 if( upper_area < (b.width() - 2) * (b.width() - 2) ) return false;
732 int count1 = 0, count2 = 0;
733 for( int col = b.left(); col <= b.right(); ++col )
734 { if( b.get_bit( b.top() + 1, col ) ) ++count1;
735 if( b.get_bit( b.bottom() - 1, col ) ) ++count2; }
736 if( count1 <= count2 ) return false;
737 }
738 return true;
739 }
740
741
test_easy(const Rectangle & charbox) const742 int Features::test_easy( const Rectangle & charbox ) const
743 {
744 int code = test_solid( charbox );
745 if( code ) return code;
746
747 if( b.top() >= charbox.vcenter() && test_comma() ) return ',';
748 if( b.bottom() <= charbox.vcenter() &&
749 b.height() > b.width() && bp.minima() == 1 )
750 {
751 if( tp.iminimum() < tp.pos( 50 ) && bp.iminimum() > bp.pos( 50 ) )
752 return '`'; else return '\'';
753 }
754 if( 2 * b.height() > 3 * wp.max() && b.top() >= charbox.vcenter() &&
755 bp.minima() == 1 ) return ',';
756 return 0;
757 }
758
759
760 // Recognizes single line, non-rectangular characters without holes.
761 // '/<>C[\^`c
762 //
test_line(const Rectangle & charbox) const763 int Features::test_line( const Rectangle & charbox ) const
764 {
765 const int vnoise = ( b.height() / 30 ) + 1;
766 const int topmax = b.top() + vnoise;
767 const int botmin = b.bottom() - vnoise;
768 const bool vbar_left = ( vbars() == 1 && vbar(0).width() >= 2 &&
769 vbar(0).left() <= b.hpos( 10 ) + 1 );
770 if( tp.minima() == 1 && bp.minima() == 1 && rp.istip() )
771 {
772 if( vbar_left && b.height() > 2 * b.width() &&
773 2 * rp[rp.pos(50)] > b.width() )
774 {
775 int row = b.seek_top( b.vcenter(), b.hcenter() );
776 int col = b.seek_right( row, b.hcenter() );
777 if( col < b.right() )
778 {
779 row = b.seek_bottom( b.vcenter(), b.hcenter() );
780 col = b.seek_right( row, b.hcenter() );
781 if( col < b.right() ) return 'C';
782 }
783 }
784 if( hbars() == 2 &&
785 hbar(0).top() <= topmax && 4 * hbar(0).height() <= b.height() &&
786 hbar(1).bottom() >= botmin && 4 * hbar(1).height() <= b.height() )
787 { if( vbar_left && b.height() > 2 * b.width() ) return '[';
788 if( vbar_left || lp.ispit() ) return 'c'; }
789 }
790
791 int slope1, slope2;
792
793 if( tp.minima() != 1 ) return 0;
794 if( lp.minima() == 1 && rp.minima() == 1 && 2 * b.height() >= b.width() &&
795 lp.straight( &slope1 ) && rp.straight( &slope2 ) )
796 {
797 if( slope1 < 0 && slope2 < 0 && bp.minima() == 2 ) return '^';
798 if( bp.minima() != 1 ) return 0;
799 if( slope1 < 0 && slope2 > 0 )
800 {
801 if( b.v_includes( charbox.vcenter() ) )
802 {
803 if( 10 * b.area() < 3 * b.size() ) return '/';
804 if( b.height() > 2 * b.width() ) return 'l';
805 return 0;
806 }
807 if( b.top() >= charbox.vcenter() ) return ',';
808 return '\'';
809 }
810 if( slope1 > 0 && slope2 < 0 )
811 {
812 if( b.bottom() > charbox.vcenter() )
813 {
814 if( ( 3 * b.width() > b.height() && b.height() > charbox.height() ) ||
815 2 * b.width() >= b.height() ) return '\\';
816 else return 0;
817 }
818 return '`';
819 }
820 return 0;
821 }
822
823 if( bp.minima() == 1 && 2 * b.width() >= b.height() &&
824 tp.straight( &slope1 ) && bp.straight( &slope2 ) )
825 {
826 if( lp.minima() == 1 && rp.minima() == 1 )
827 {
828 if( slope1 < 0 && slope2 > 0 )
829 {
830 if( b.v_includes( charbox.vcenter() ) ) return '/';
831 if( b.top() >= charbox.vcenter() ) return ',';
832 return '\'';
833 }
834 if( slope1 > 0 && slope2 < 0 )
835 {
836 if( b.bottom() > charbox.vcenter() ) return '\\';
837 return '`';
838 }
839 }
840 else if( 2 * b.width() >= b.height() )
841 {
842 if( slope1 < 0 && slope2 < 0 && lp.minima() == 1 && rp.minima() == 2 )
843 return '<';
844 if( slope1 > 0 && slope2 > 0 && lp.minima() == 2 && rp.minima() == 1 )
845 return '>';
846 }
847 }
848 return 0;
849 }
850
851
test_solid(const Rectangle & charbox) const852 int Features::test_solid( const Rectangle & charbox ) const
853 {
854 if( b.holes() ) return 0;
855
856 if( b.height() >= 5 && b.width() >= 5 )
857 {
858 if( 2 * b.height() > b.width() && ( tp.minima() != 1 || bp.minima() != 1 ) )
859 return 0;
860 if( b.height() < 2 * b.width() && ( lp.minima() != 1 || rp.minima() != 1 ) )
861 return 0;
862 }
863
864 int inner_area, inner_size, porosity = 0;
865
866 if( b.width() >= 3 && b.height() >= 3 )
867 {
868 const int vnoise = ( b.height() / 100 ) + 1;
869 inner_size = ( b.width() - 2 ) * ( b.height() - 2 );
870 inner_area = 0;
871 for( int row = b.top() + vnoise; row <= b.bottom() - vnoise; ++row )
872 {
873 int holes = 0; // FIXME
874 for( int col = b.left() + 1; col < b.right(); ++col )
875 { if( b.get_bit( row, col ) ) ++inner_area; else ++holes; }
876 if( 5 * holes >= b.width() ) porosity += ( 5 * holes ) / b.width();
877 }
878 if( inner_area * 100 < inner_size * 70 ) return 0;
879 }
880 else { inner_size = 0; inner_area = b.area(); }
881
882 if( Ocrad::similar( b.height(), wp.max(), 20, 2 ) )
883 {
884 const int n = std::min( b.height(), b.width() );
885 if( n >= 6 )
886 {
887 int d = 0;
888 for( int i = 0; i < n; ++i )
889 {
890 if( b.get_bit( b.top() + i, b.left() + i ) ) ++d;
891 if( b.get_bit( b.top() + i, b.right() - i ) ) --d;
892 }
893 if( 2 * std::abs( d ) >= n - 1 ) return 0;
894 }
895 if( ( !porosity && inner_area * 100 >= inner_size * 75 ) ||
896 ( b.width() >= 7 && b.height() >= 7 &&
897 ( 100 * b.area_octagon() >= 95 * b.size_octagon() ||
898 100 * b.area_octagon() >= 95 * b.area() ) ) ) return '.';
899 return 0;
900 }
901 if( porosity > 1 || inner_area * 100 < inner_size * 85 ||
902 ( porosity && inner_area * 100 < inner_size * 95 ) ) return 0;
903 if( b.width() > b.height() )
904 {
905 if( b.top() > charbox.vpos( 90 ) ||
906 ( charbox.bottom() - b.bottom() < b.top() - charbox.vcenter() &&
907 b.width() >= 5 * b.height() ) ) return '_';
908 return '-';
909 }
910
911 if( b.height() > b.width() )
912 {
913 if( b.top() > charbox.vcenter() ) return ',';
914 if( b.bottom() <= charbox.vcenter() ) return '\'';
915 return '|';
916 }
917 return 0;
918 }
919