1 /*  GNU Ocrad - Optical Character Recognition program
2     Copyright (C) 2003-2019 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #include <algorithm>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <vector>
22 #include <stdint.h>
23 
24 #include "common.h"
25 #include "rectangle.h"
26 #include "segment.h"
27 #include "ucs.h"
28 #include "bitmap.h"
29 #include "blob.h"
30 #include "profile.h"
31 #include "feats.h"
32 
33 
34 // Tests if the lower half of character is open to the left, to the right,
35 // and/or to the bottom
36 //
test_49ARegpq(const Rectangle & charbox) const37 int Features::test_49ARegpq( const Rectangle & charbox ) const
38   {
39   const Bitmap & h = b.hole( 0 );
40 
41   if( bp.minima( b.height() / 10 + 1 ) == 2 && bp.isctip() && tp.minima() == 1 )
42     {
43     if( tp.isvpit() || rp.decreasing() ||
44         ( rp.decreasing( 1, rp.pos( 20 ) ) && lp.decreasing( 1, lp.pos( 20 ) ) ) )
45       return 'A';
46     if( hbars() == 2 && hbar(1).width() >= b.width() )
47       {
48       const int i = hbar(1).top() - b.top();
49       const int j = hbar(1).bottom() - b.top();
50       if( rp.area( i, j ) <= lp.area( i, j ) ) return 'A';
51       }
52     return 'R';
53     }
54 
55   int col = h.hcenter();
56   int row = b.seek_bottom( h.bottom(), col, false ) + 1;
57   if( row >= b.vpos( 90 ) )
58     { col = h.left(); row = b.seek_bottom( h.bottom(), col, false ) + 1; }
59   if( row >= b.bottom() ) return 0;
60 
61   if( b.escape_right( row, col ) )
62     {
63     if( ( lp.ispit() && b.seek_bottom( row, h.right() ) < b.bottom() ) ||
64         ( lp.isconvex() && b.seek_bottom( row, h.hcenter() ) < b.bottom() ) )
65       return 'e';
66     if( bp.ispit() )
67       {
68       int row2 = b.seek_bottom( row, h.right() );
69       if( row2 < b.vpos( 75 ) ) return 'g';
70       if( row2 < b.bottom() ) return 'e';
71       }
72     return 'p';
73     }
74 
75   else if( b.escape_left( row, col ) )
76     {
77     Profile hlp( h, Profile::left );
78     Profile htp( h, Profile::top );
79     Profile hwp( h, Profile::width );
80     if( vbars() == 1 && vbar(0).hcenter() > b.hcenter() &&
81         hlp.decreasing() && htp.decreasing() &&
82         hwp[hwp.pos(30)] < hwp[hwp.pos(70)] )
83       return '4';
84     if( rp.ispit() && rp.minima() == 1 && rp.iminimum() < rp.pos( 70 ) &&
85         tp.ispit() && charbox.bottom() > b.vpos( rp.isconvex() ? 80 : 90 ) )
86       return '9';
87     int hdiff;
88     if( b.bottom_hook( &hdiff ) && hdiff > 0 )
89       {
90       if( h.bottom() < b.vcenter() && h.right() + 2 <= b.right() &&
91           ( !b.get_bit( h.bottom() + 1, h.right() + 1 ) ||
92             !b.get_bit( h.bottom() + 1, h.right() + 2 ) || rp.isctip() ) )
93         return 's';
94       else return 'g';
95       }
96     if( row > b.vpos( 85 ) && tp.ispit() ) return 'Q';
97     int row2 = b.seek_bottom( row, col );
98     if( row2 < b.bottom() &&
99         rp.increasing( ( ( row + ( 2 * row2 ) ) / 3 ) - b.top() ) )
100       return 'g';
101     if( bp.minima() == 1 )
102       {
103       if( h.height() >= charbox.height() ) return 'Q';
104       if( h.right() < b.hcenter() && h.bottom() < b.vcenter() ) return '2';
105       return 'q';
106       }
107     }
108   return 0;
109   }
110 
111 
test_4ADQao(const Charset & charset,const Rectangle & charbox) const112 int Features::test_4ADQao( const Charset & charset, const Rectangle & charbox ) const
113   {
114   const Bitmap & h = b.hole( 0 );
115   int left_delta = h.left() - b.left(), right_delta = b.right() - h.right();
116 
117   if( !lp.ispit() && lp.isflats() && rp.ispit() ) return 'D';
118   if( Ocrad::similar( left_delta, right_delta, 40 ) &&
119       tp.minima() == 2 && bp.minima() == 2 && !rp.isconvex() ) return '#';
120   if( tp.minima() == 1 && bp.minima() == 1 )
121     {
122     int row = b.seek_bottom( h.bottom(), h.hcenter(), false );
123     if( charset.enabled( Charset::iso_8859_15 ) ||
124         charset.enabled( Charset::iso_8859_9 ) )
125       if( !lp.isconvex() && bp.isconvex() && !rp.isconvex() &&
126           b.seek_bottom( row, h.hcenter() ) < b.bottom() )
127         return UCS::SEACUTE;
128     row = ( row + b.seek_bottom( row, h.hcenter() ) ) / 2;
129     if( row < b.bottom() - 1 && !lp.isflats() &&
130         b.seek_left( row, h.hcenter() ) <= b.left() )
131       {
132       if( ( 2 * h.height() <= b.height() || 2 * h.width() <= b.width() ) &&
133           wp[h.top()-b.top()] < wp[h.bottom()-b.top()] ) return '4';
134       if( !rp.ispit() && !rp.isconvex() ) return 'Q';
135       }
136     }
137   if( 2 * b.width() > 5 * h.width() && !rp.isconvex() )
138     {
139     const int c = segments_in_row( h.vcenter() );
140     const int m = bp.minima();
141     if( c == 3 && h.top() < b.vcenter() && h.bottom() > b.vcenter() &&
142         3 * h.height() >= b.height() && ( m == 3 || m == 2 ) && !lp.ispit() )
143       return 'm';
144     if( c == 3 && left_delta > right_delta && lp.ispit() &&
145         segments_in_col( h.hcenter() ) == 4 )
146       return '@';
147     if( c == 4 && Ocrad::similar( left_delta, right_delta, 40 ) && lp.ispit() )
148       return '@';
149     }
150   if( tp.minima() == 1 && bp.istip() && !rp.isctip( 66 ) ) return 'A';
151   if( Ocrad::similar( left_delta, right_delta, 50 ) )
152     {
153     if( bp.minima() == 1 && rp.isconvex() && b.test_BD() ) return 'D';
154     if( bp.minima() > 1 || rp.minima() > 1 || b.test_Q() )
155       { if( 4 * h.size() >= b.size() || tp.ispit() || lp.ispit() ) return 'Q';
156       else return 0; }
157     if( 3 * bp[bp.pos(100)] < b.height() && 5 * rp[rp.pos(55)] >= b.width() )
158       return 'a';
159     if( lp.istip() ) return 'n';
160     if( b.vpos( 80 ) < charbox.vcenter() ) return UCS::DEG;
161     return 'o';
162     }
163   if( left_delta > right_delta && rp.ispit() &&
164       tp.minima() == 1 && bp.minima() == 1 ) return 'D';
165   if( Ocrad::similar( left_delta, right_delta, 50 ) &&
166       ( bp.minima() > 1 || rp.minima() > 1 ) ) return 'a';
167   return 0;
168   }
169 
170 
171 // Tests if the upper half of character is open to the left, to the right,
172 // and/or to the bottom
173 //
test_6abd(const Charset & charset) const174 int Features::test_6abd( const Charset & charset ) const
175   {
176   const Bitmap & h = b.hole( 0 );
177 
178   if( 3 * h.width() < b.width() &&
179       ( bp.minima( b.height() / 4 ) != 1 || tp.minima( h.vcenter() - b.top() ) != 1 ) ) return 0;
180 
181   int col = h.hcenter();
182   int row = b.seek_top( h.top(), col, false ) - 1;
183   if( row <= b.top() )
184     {
185     col = h.right(); if( b.right() - h.right() > h.width() ) ++col;
186     row = b.seek_top( h.top(), col, false ) - 1;
187     }
188   if( row <= b.top() ) return 0;
189   const int rcol = ( b.right() + h.right() ) / 2;
190   const int urow = h.top() - ( b.bottom() - h.bottom() );
191   const bool oacute1 = ( ( b.seek_right( urow - 1, h.right() ) >= b.right() ) ||
192                          ( b.seek_right( row, col ) >= b.right() ) );
193 
194   if( b.escape_right( row, col ) )
195     {
196     const int noise = ( b.width() / 30 ) + 1;
197     const int c = lp[urow-b.top()];
198     const bool oacute2 = ( c > lp[h.top()-b.top()] + noise &&
199                            urow <= b.top() + tp[std::min( c - 1, b.width() / 4 )] );
200     if( ( oacute1 && oacute2 ) && ( charset.enabled( Charset::iso_8859_15 ) ||
201                                     charset.enabled( Charset::iso_8859_9 ) ) )
202       {
203       const bool oacute3 = ( b.right() - rp[rp.pos(5)] >= h.right() ||
204                              b.left() + lp[h.top()-b.top()] <= b.hpos( 5 ) );
205       if( oacute3 ) return UCS::SOACUTE;
206       }
207     if( !oacute2 && lp.ispit() && bp.ispit() )
208       {
209       int row2 = b.seek_top( h.top(), h.right() + 1, false ) - 1;
210       row2 = b.seek_top( row2, h.right() + 1 );
211       if( row2 > b.top() ) return '6';
212       }
213     int row2 = b.seek_top( h.top(), rcol, false ) - 1;
214     row2 = b.seek_top( row2, rcol );
215     if( row2 <= b.top() ) return 'b';
216     const int m = tp.minima( b.height() / 2 );
217     if( m == 1 && bp.minima() == 1 ) return 's';
218     if( m == 2 ) return 'k'; else return 0;
219     }
220 
221   if( b.escape_left( row, col ) )
222     {
223     const int col2 = std::max( h.left(), h.hpos( 10 ) );
224     int row2 = b.seek_top( h.top(), col2, false ) - 1;
225     row2 = b.seek_top( row2, col2 );
226     if( row2 > b.top() )
227       {
228       if( charset.enabled( Charset::iso_8859_15 ) ||
229           charset.enabled( Charset::iso_8859_9 ) )
230         {
231         int row3 = b.seek_top( row, col );
232         if( row > b.vcenter() && row3 > b.vpos( 20 ) ) return UCS::SAACUTE;
233         if( oacute1 ) return UCS::SOGRAVE;
234         }
235       return 'a';
236       }
237     if( charset.enabled( Charset::iso_8859_15 ) ||
238         charset.enabled( Charset::iso_8859_9 ) )
239       if( oacute1 ) return UCS::SOACUTE;
240     return 'd';
241     }
242 
243   if( b.width() > 3 * h.width() && h.top() < b.vcenter() &&
244       segments_in_row( b.vcenter() ) == 3 && !lp.isconvex() ) return 'm';
245   int hdiff; if( b.top_hook( &hdiff ) && hdiff > 0 ) return 's';
246   return 0;
247   }
248