1 /* GNU Ocrad - Optical Character Recognition program
2 Copyright (C) 2003-2019 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <algorithm>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <vector>
22 #include <stdint.h>
23
24 #include "common.h"
25 #include "rectangle.h"
26 #include "segment.h"
27 #include "ucs.h"
28 #include "bitmap.h"
29 #include "blob.h"
30 #include "profile.h"
31 #include "feats.h"
32
33
34 // Tests if the lower half of character is open to the left, to the right,
35 // and/or to the bottom
36 //
test_49ARegpq(const Rectangle & charbox) const37 int Features::test_49ARegpq( const Rectangle & charbox ) const
38 {
39 const Bitmap & h = b.hole( 0 );
40
41 if( bp.minima( b.height() / 10 + 1 ) == 2 && bp.isctip() && tp.minima() == 1 )
42 {
43 if( tp.isvpit() || rp.decreasing() ||
44 ( rp.decreasing( 1, rp.pos( 20 ) ) && lp.decreasing( 1, lp.pos( 20 ) ) ) )
45 return 'A';
46 if( hbars() == 2 && hbar(1).width() >= b.width() )
47 {
48 const int i = hbar(1).top() - b.top();
49 const int j = hbar(1).bottom() - b.top();
50 if( rp.area( i, j ) <= lp.area( i, j ) ) return 'A';
51 }
52 return 'R';
53 }
54
55 int col = h.hcenter();
56 int row = b.seek_bottom( h.bottom(), col, false ) + 1;
57 if( row >= b.vpos( 90 ) )
58 { col = h.left(); row = b.seek_bottom( h.bottom(), col, false ) + 1; }
59 if( row >= b.bottom() ) return 0;
60
61 if( b.escape_right( row, col ) )
62 {
63 if( ( lp.ispit() && b.seek_bottom( row, h.right() ) < b.bottom() ) ||
64 ( lp.isconvex() && b.seek_bottom( row, h.hcenter() ) < b.bottom() ) )
65 return 'e';
66 if( bp.ispit() )
67 {
68 int row2 = b.seek_bottom( row, h.right() );
69 if( row2 < b.vpos( 75 ) ) return 'g';
70 if( row2 < b.bottom() ) return 'e';
71 }
72 return 'p';
73 }
74
75 else if( b.escape_left( row, col ) )
76 {
77 Profile hlp( h, Profile::left );
78 Profile htp( h, Profile::top );
79 Profile hwp( h, Profile::width );
80 if( vbars() == 1 && vbar(0).hcenter() > b.hcenter() &&
81 hlp.decreasing() && htp.decreasing() &&
82 hwp[hwp.pos(30)] < hwp[hwp.pos(70)] )
83 return '4';
84 if( rp.ispit() && rp.minima() == 1 && rp.iminimum() < rp.pos( 70 ) &&
85 tp.ispit() && charbox.bottom() > b.vpos( rp.isconvex() ? 80 : 90 ) )
86 return '9';
87 int hdiff;
88 if( b.bottom_hook( &hdiff ) && hdiff > 0 )
89 {
90 if( h.bottom() < b.vcenter() && h.right() + 2 <= b.right() &&
91 ( !b.get_bit( h.bottom() + 1, h.right() + 1 ) ||
92 !b.get_bit( h.bottom() + 1, h.right() + 2 ) || rp.isctip() ) )
93 return 's';
94 else return 'g';
95 }
96 if( row > b.vpos( 85 ) && tp.ispit() ) return 'Q';
97 int row2 = b.seek_bottom( row, col );
98 if( row2 < b.bottom() &&
99 rp.increasing( ( ( row + ( 2 * row2 ) ) / 3 ) - b.top() ) )
100 return 'g';
101 if( bp.minima() == 1 )
102 {
103 if( h.height() >= charbox.height() ) return 'Q';
104 if( h.right() < b.hcenter() && h.bottom() < b.vcenter() ) return '2';
105 return 'q';
106 }
107 }
108 return 0;
109 }
110
111
test_4ADQao(const Charset & charset,const Rectangle & charbox) const112 int Features::test_4ADQao( const Charset & charset, const Rectangle & charbox ) const
113 {
114 const Bitmap & h = b.hole( 0 );
115 int left_delta = h.left() - b.left(), right_delta = b.right() - h.right();
116
117 if( !lp.ispit() && lp.isflats() && rp.ispit() ) return 'D';
118 if( Ocrad::similar( left_delta, right_delta, 40 ) &&
119 tp.minima() == 2 && bp.minima() == 2 && !rp.isconvex() ) return '#';
120 if( tp.minima() == 1 && bp.minima() == 1 )
121 {
122 int row = b.seek_bottom( h.bottom(), h.hcenter(), false );
123 if( charset.enabled( Charset::iso_8859_15 ) ||
124 charset.enabled( Charset::iso_8859_9 ) )
125 if( !lp.isconvex() && bp.isconvex() && !rp.isconvex() &&
126 b.seek_bottom( row, h.hcenter() ) < b.bottom() )
127 return UCS::SEACUTE;
128 row = ( row + b.seek_bottom( row, h.hcenter() ) ) / 2;
129 if( row < b.bottom() - 1 && !lp.isflats() &&
130 b.seek_left( row, h.hcenter() ) <= b.left() )
131 {
132 if( ( 2 * h.height() <= b.height() || 2 * h.width() <= b.width() ) &&
133 wp[h.top()-b.top()] < wp[h.bottom()-b.top()] ) return '4';
134 if( !rp.ispit() && !rp.isconvex() ) return 'Q';
135 }
136 }
137 if( 2 * b.width() > 5 * h.width() && !rp.isconvex() )
138 {
139 const int c = segments_in_row( h.vcenter() );
140 const int m = bp.minima();
141 if( c == 3 && h.top() < b.vcenter() && h.bottom() > b.vcenter() &&
142 3 * h.height() >= b.height() && ( m == 3 || m == 2 ) && !lp.ispit() )
143 return 'm';
144 if( c == 3 && left_delta > right_delta && lp.ispit() &&
145 segments_in_col( h.hcenter() ) == 4 )
146 return '@';
147 if( c == 4 && Ocrad::similar( left_delta, right_delta, 40 ) && lp.ispit() )
148 return '@';
149 }
150 if( tp.minima() == 1 && bp.istip() && !rp.isctip( 66 ) ) return 'A';
151 if( Ocrad::similar( left_delta, right_delta, 50 ) )
152 {
153 if( bp.minima() == 1 && rp.isconvex() && b.test_BD() ) return 'D';
154 if( bp.minima() > 1 || rp.minima() > 1 || b.test_Q() )
155 { if( 4 * h.size() >= b.size() || tp.ispit() || lp.ispit() ) return 'Q';
156 else return 0; }
157 if( 3 * bp[bp.pos(100)] < b.height() && 5 * rp[rp.pos(55)] >= b.width() )
158 return 'a';
159 if( lp.istip() ) return 'n';
160 if( b.vpos( 80 ) < charbox.vcenter() ) return UCS::DEG;
161 return 'o';
162 }
163 if( left_delta > right_delta && rp.ispit() &&
164 tp.minima() == 1 && bp.minima() == 1 ) return 'D';
165 if( Ocrad::similar( left_delta, right_delta, 50 ) &&
166 ( bp.minima() > 1 || rp.minima() > 1 ) ) return 'a';
167 return 0;
168 }
169
170
171 // Tests if the upper half of character is open to the left, to the right,
172 // and/or to the bottom
173 //
test_6abd(const Charset & charset) const174 int Features::test_6abd( const Charset & charset ) const
175 {
176 const Bitmap & h = b.hole( 0 );
177
178 if( 3 * h.width() < b.width() &&
179 ( bp.minima( b.height() / 4 ) != 1 || tp.minima( h.vcenter() - b.top() ) != 1 ) ) return 0;
180
181 int col = h.hcenter();
182 int row = b.seek_top( h.top(), col, false ) - 1;
183 if( row <= b.top() )
184 {
185 col = h.right(); if( b.right() - h.right() > h.width() ) ++col;
186 row = b.seek_top( h.top(), col, false ) - 1;
187 }
188 if( row <= b.top() ) return 0;
189 const int rcol = ( b.right() + h.right() ) / 2;
190 const int urow = h.top() - ( b.bottom() - h.bottom() );
191 const bool oacute1 = ( ( b.seek_right( urow - 1, h.right() ) >= b.right() ) ||
192 ( b.seek_right( row, col ) >= b.right() ) );
193
194 if( b.escape_right( row, col ) )
195 {
196 const int noise = ( b.width() / 30 ) + 1;
197 const int c = lp[urow-b.top()];
198 const bool oacute2 = ( c > lp[h.top()-b.top()] + noise &&
199 urow <= b.top() + tp[std::min( c - 1, b.width() / 4 )] );
200 if( ( oacute1 && oacute2 ) && ( charset.enabled( Charset::iso_8859_15 ) ||
201 charset.enabled( Charset::iso_8859_9 ) ) )
202 {
203 const bool oacute3 = ( b.right() - rp[rp.pos(5)] >= h.right() ||
204 b.left() + lp[h.top()-b.top()] <= b.hpos( 5 ) );
205 if( oacute3 ) return UCS::SOACUTE;
206 }
207 if( !oacute2 && lp.ispit() && bp.ispit() )
208 {
209 int row2 = b.seek_top( h.top(), h.right() + 1, false ) - 1;
210 row2 = b.seek_top( row2, h.right() + 1 );
211 if( row2 > b.top() ) return '6';
212 }
213 int row2 = b.seek_top( h.top(), rcol, false ) - 1;
214 row2 = b.seek_top( row2, rcol );
215 if( row2 <= b.top() ) return 'b';
216 const int m = tp.minima( b.height() / 2 );
217 if( m == 1 && bp.minima() == 1 ) return 's';
218 if( m == 2 ) return 'k'; else return 0;
219 }
220
221 if( b.escape_left( row, col ) )
222 {
223 const int col2 = std::max( h.left(), h.hpos( 10 ) );
224 int row2 = b.seek_top( h.top(), col2, false ) - 1;
225 row2 = b.seek_top( row2, col2 );
226 if( row2 > b.top() )
227 {
228 if( charset.enabled( Charset::iso_8859_15 ) ||
229 charset.enabled( Charset::iso_8859_9 ) )
230 {
231 int row3 = b.seek_top( row, col );
232 if( row > b.vcenter() && row3 > b.vpos( 20 ) ) return UCS::SAACUTE;
233 if( oacute1 ) return UCS::SOGRAVE;
234 }
235 return 'a';
236 }
237 if( charset.enabled( Charset::iso_8859_15 ) ||
238 charset.enabled( Charset::iso_8859_9 ) )
239 if( oacute1 ) return UCS::SOACUTE;
240 return 'd';
241 }
242
243 if( b.width() > 3 * h.width() && h.top() < b.vcenter() &&
244 segments_in_row( b.vcenter() ) == 3 && !lp.isconvex() ) return 'm';
245 int hdiff; if( b.top_hook( &hdiff ) && hdiff > 0 ) return 's';
246 return 0;
247 }
248