1 /**********************************************************************
2 * File: blobbox.cpp (Formerly blobnbox.c)
3 * Description: Code for the textord blob class.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include "blobbox.h"
25 #include "blobs.h" // for TPOINT
26 #include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE, C_OUTLINE_LIST
27 #include "environ.h" // for l_uint32
28 #include "host.h" // for NearlyEqual
29 #include "points.h" // for operator+=, ICOORD::rotate
30
31 #include "helpers.h" // for UpdateRange, IntCastRounded
32
33 #include <allheaders.h> // for pixGetHeight, pixGetPixel
34
35 #include <algorithm> // for max, min
36 #include <cmath>
37 #include <cstdint> // for INT32_MAX, INT16_MAX
38
39 #define PROJECTION_MARGIN 10 // arbitrary
40
41 namespace tesseract {
42
43 // Up to 30 degrees is allowed for rotations of diacritic blobs.
44 const double kCosSmallAngle = 0.866;
45 // Min aspect ratio for a joined word to indicate an obvious flow direction.
46 const double kDefiniteAspectRatio = 2.0;
47 // Multiple of short length in perimeter to make a joined word.
48 const double kComplexShapePerimeterRatio = 1.5;
49 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
50 const double kMinMediumSizeRatio = 0.25;
51 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
52 const double kMaxMediumSizeRatio = 4.0;
53
54 // Rotates the box and the underlying blob.
rotate(FCOORD rotation)55 void BLOBNBOX::rotate(FCOORD rotation) {
56 cblob_ptr->rotate(rotation);
57 rotate_box(rotation);
58 compute_bounding_box();
59 }
60
61 // Reflect the box in the y-axis, leaving the underlying blob untouched.
reflect_box_in_y_axis()62 void BLOBNBOX::reflect_box_in_y_axis() {
63 int left = -box.right();
64 box.set_right(-box.left());
65 box.set_left(left);
66 }
67
68 // Rotates the box by the angle given by rotation.
69 // If the blob is a diacritic, then only small rotations for skew
70 // correction can be applied.
rotate_box(FCOORD rotation)71 void BLOBNBOX::rotate_box(FCOORD rotation) {
72 if (IsDiacritic()) {
73 ASSERT_HOST(rotation.x() >= kCosSmallAngle);
74 ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
75 ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
76 top_pt.rotate(rotation);
77 base_char_top_ = top_pt.y();
78 bottom_pt.rotate(rotation);
79 base_char_bottom_ = bottom_pt.y();
80 box.rotate(rotation);
81 } else {
82 box.rotate(rotation);
83 set_diacritic_box(box);
84 }
85 }
86
87 /**********************************************************************
88 * BLOBNBOX::merge
89 *
90 * Merge this blob with the given blob, which should be after this.
91 **********************************************************************/
merge(BLOBNBOX * nextblob)92 void BLOBNBOX::merge( // merge blobs
93 BLOBNBOX *nextblob // blob to join with
94 ) {
95 box += nextblob->box; // merge boxes
96 set_diacritic_box(box);
97 nextblob->joined = true;
98 }
99
100 // Merge this with other, taking the outlines from other.
101 // Other is not deleted, but left for the caller to handle.
really_merge(BLOBNBOX * other)102 void BLOBNBOX::really_merge(BLOBNBOX *other) {
103 if (other->cblob_ptr != nullptr) {
104 C_OUTLINE_IT ol_it(cblob_ptr->out_list());
105 ol_it.add_list_after(other->cblob_ptr->out_list());
106 }
107 compute_bounding_box();
108 }
109
110 /**********************************************************************
111 * BLOBNBOX::chop
112 *
113 * Chop this blob into equal sized pieces using the x height as a guide.
114 * The blob is not actually chopped. Instead, fake blobs are inserted
115 * with the relevant bounding boxes.
116 **********************************************************************/
117
chop(BLOBNBOX_IT * start_it,BLOBNBOX_IT * end_it,FCOORD rotation,float xheight)118 void BLOBNBOX::chop( // chop blobs
119 BLOBNBOX_IT *start_it, // location of this
120 BLOBNBOX_IT *end_it, // iterator
121 FCOORD rotation, // for landscape
122 float xheight // of line
123 ) {
124 int16_t blobcount; // no of blobs
125 BLOBNBOX *newblob; // fake blob
126 BLOBNBOX *blob; // current blob
127 int16_t blobindex; // number of chop
128 int16_t leftx; // left edge of blob
129 float blobwidth; // width of each
130 float rightx; // right edge to scan
131 float ymin, ymax; // limits of new blob
132 float test_ymin, test_ymax; // limits of part blob
133 ICOORD bl, tr; // corners of box
134 BLOBNBOX_IT blob_it; // blob iterator
135
136 // get no of chops
137 blobcount = static_cast<int16_t>(std::floor(box.width() / xheight));
138 if (blobcount > 1 && cblob_ptr != nullptr) {
139 // width of each
140 blobwidth = static_cast<float>(box.width() + 1) / blobcount;
141 for (blobindex = blobcount - 1, rightx = box.right(); blobindex >= 0;
142 blobindex--, rightx -= blobwidth) {
143 ymin = static_cast<float>(INT32_MAX);
144 ymax = static_cast<float>(-INT32_MAX);
145 blob_it = *start_it;
146 do {
147 blob = blob_it.data();
148 find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, rightx,
149 /*rotation, */ test_ymin, test_ymax);
150 blob_it.forward();
151 UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
152 } while (blob != end_it->data());
153 if (ymin < ymax) {
154 leftx = static_cast<int16_t>(std::floor(rightx - blobwidth));
155 if (leftx < box.left()) {
156 leftx = box.left(); // clip to real box
157 }
158 bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin)));
159 tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax)));
160 if (blobindex == 0) {
161 box = TBOX(bl, tr); // change box
162 } else {
163 newblob = new BLOBNBOX;
164 // box is all it has
165 newblob->box = TBOX(bl, tr);
166 // stay on current
167 newblob->base_char_top_ = tr.y();
168 newblob->base_char_bottom_ = bl.y();
169 end_it->add_after_stay_put(newblob);
170 }
171 }
172 }
173 }
174 }
175
176 // Returns the box gaps between this and its neighbours_ in an array
177 // indexed by BlobNeighbourDir.
NeighbourGaps(int gaps[BND_COUNT]) const178 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
179 for (int dir = 0; dir < BND_COUNT; ++dir) {
180 gaps[dir] = INT16_MAX;
181 BLOBNBOX *neighbour = neighbours_[dir];
182 if (neighbour != nullptr) {
183 const TBOX &n_box = neighbour->bounding_box();
184 if (dir == BND_LEFT || dir == BND_RIGHT) {
185 gaps[dir] = box.x_gap(n_box);
186 } else {
187 gaps[dir] = box.y_gap(n_box);
188 }
189 }
190 }
191 }
192 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
193 // modified so that if the max exceeds the max dimension of the blob, and
194 // the min is less, the max is replaced with the min.
195 // The objective is to catch cases where there is only a single neighbour
196 // and avoid reporting the other gap as a ridiculously large number
MinMaxGapsClipped(int * h_min,int * h_max,int * v_min,int * v_max) const197 void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const {
198 int max_dimension = std::max(box.width(), box.height());
199 int gaps[BND_COUNT];
200 NeighbourGaps(gaps);
201 *h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]);
202 *h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]);
203 if (*h_max > max_dimension && *h_min < max_dimension) {
204 *h_max = *h_min;
205 }
206 *v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]);
207 *v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]);
208 if (*v_max > max_dimension && *v_min < max_dimension) {
209 *v_max = *v_min;
210 }
211 }
212
213 // Nulls out any neighbours that are DeletableNoise to remove references.
CleanNeighbours()214 void BLOBNBOX::CleanNeighbours() {
215 for (int dir = 0; dir < BND_COUNT; ++dir) {
216 BLOBNBOX *neighbour = neighbours_[dir];
217 if (neighbour != nullptr && neighbour->DeletableNoise()) {
218 neighbours_[dir] = nullptr;
219 good_stroke_neighbours_[dir] = false;
220 }
221 }
222 }
223
224 // Returns positive if there is at least one side neighbour that has a similar
225 // stroke width and is not on the other side of a rule line.
GoodTextBlob() const226 int BLOBNBOX::GoodTextBlob() const {
227 int score = 0;
228 for (int dir = 0; dir < BND_COUNT; ++dir) {
229 auto bnd = static_cast<BlobNeighbourDir>(dir);
230 if (good_stroke_neighbour(bnd)) {
231 ++score;
232 }
233 }
234 return score;
235 }
236
237 // Returns the number of side neighbours that are of type BRT_NOISE.
NoisyNeighbours() const238 int BLOBNBOX::NoisyNeighbours() const {
239 int count = 0;
240 for (int dir = 0; dir < BND_COUNT; ++dir) {
241 auto bnd = static_cast<BlobNeighbourDir>(dir);
242 BLOBNBOX *blob = neighbour(bnd);
243 if (blob != nullptr && blob->region_type() == BRT_NOISE) {
244 ++count;
245 }
246 }
247 return count;
248 }
249
250 // Returns true, and sets vert_possible/horz_possible if the blob has some
251 // feature that makes it individually appear to flow one way.
252 // eg if it has a high aspect ratio, yet has a complex shape, such as a
253 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
DefiniteIndividualFlow()254 bool BLOBNBOX::DefiniteIndividualFlow() {
255 if (cblob() == nullptr) {
256 return false;
257 }
258 int box_perimeter = 2 * (box.height() + box.width());
259 if (box.width() > box.height() * kDefiniteAspectRatio) {
260 // Attempt to distinguish a wide joined word from a dash.
261 // If it is a dash, then its perimeter is approximately
262 // 2 * (box width + stroke width), but more if the outline is noisy,
263 // so perimeter - 2*(box width + stroke width) should be close to zero.
264 // A complex shape such as a joined word should have a much larger value.
265 int perimeter = cblob()->perimeter();
266 if (vert_stroke_width() > 0 || perimeter <= 0) {
267 perimeter -= 2 * vert_stroke_width();
268 } else {
269 perimeter -= 4 * cblob()->area() / perimeter;
270 }
271 perimeter -= 2 * box.width();
272 // Use a multiple of the box perimeter as a threshold.
273 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
274 set_vert_possible(false);
275 set_horz_possible(true);
276 return true;
277 }
278 }
279 if (box.height() > box.width() * kDefiniteAspectRatio) {
280 // As above, but for a putative vertical word vs a I/1/l.
281 int perimeter = cblob()->perimeter();
282 if (horz_stroke_width() > 0 || perimeter <= 0) {
283 perimeter -= 2 * horz_stroke_width();
284 } else {
285 perimeter -= 4 * cblob()->area() / perimeter;
286 }
287 perimeter -= 2 * box.height();
288 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
289 set_vert_possible(true);
290 set_horz_possible(false);
291 return true;
292 }
293 }
294 return false;
295 }
296
297 // Returns true if there is no tabstop violation in merging this and other.
ConfirmNoTabViolation(const BLOBNBOX & other) const298 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const {
299 if (box.left() < other.box.left() && box.left() < other.left_rule_) {
300 return false;
301 }
302 if (other.box.left() < box.left() && other.box.left() < left_rule_) {
303 return false;
304 }
305 if (box.right() > other.box.right() && box.right() > other.right_rule_) {
306 return false;
307 }
308 if (other.box.right() > box.right() && other.box.right() > right_rule_) {
309 return false;
310 }
311 return true;
312 }
313
314 // Returns true if other has a similar stroke width to this.
MatchingStrokeWidth(const BLOBNBOX & other,double fractional_tolerance,double constant_tolerance) const315 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance,
316 double constant_tolerance) const {
317 // The perimeter-based width is used as a backup in case there is
318 // no information in the blob.
319 double p_width = area_stroke_width();
320 double n_p_width = other.area_stroke_width();
321 float h_tolerance = horz_stroke_width_ * fractional_tolerance + constant_tolerance;
322 float v_tolerance = vert_stroke_width_ * fractional_tolerance + constant_tolerance;
323 double p_tolerance = p_width * fractional_tolerance + constant_tolerance;
324 bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
325 bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
326 bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, other.horz_stroke_width_, h_tolerance);
327 bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, other.vert_stroke_width_, v_tolerance);
328 bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
329 // For a match, at least one of the horizontal and vertical widths
330 // must match, and the other one must either match or be zero.
331 // Only if both are zero will we look at the perimeter metric.
332 return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
333 }
334
335 // Returns a bounding box of the outline contained within the
336 // given horizontal range.
BoundsWithinLimits(int left,int right)337 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
338 FCOORD no_rotation(1.0f, 0.0f);
339 float top = box.top();
340 float bottom = box.bottom();
341 if (cblob_ptr != nullptr) {
342 find_cblob_limits(cblob_ptr, static_cast<float>(left), static_cast<float>(right), no_rotation,
343 bottom, top);
344 }
345
346 if (top < bottom) {
347 top = box.top();
348 bottom = box.bottom();
349 }
350 FCOORD bot_left(left, bottom);
351 FCOORD top_right(right, top);
352 TBOX shrunken_box(bot_left);
353 TBOX shrunken_box2(top_right);
354 shrunken_box += shrunken_box2;
355 return shrunken_box;
356 }
357
358 // Estimates and stores the baseline position based on the shape of the
359 // outline.
EstimateBaselinePosition()360 void BLOBNBOX::EstimateBaselinePosition() {
361 baseline_y_ = box.bottom(); // The default.
362 if (cblob_ptr == nullptr) {
363 return;
364 }
365 baseline_y_ = cblob_ptr->EstimateBaselinePosition();
366 }
367
368 // Helper to call CleanNeighbours on all blobs on the list.
CleanNeighbours(BLOBNBOX_LIST * blobs)369 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST *blobs) {
370 BLOBNBOX_IT blob_it(blobs);
371 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
372 blob_it.data()->CleanNeighbours();
373 }
374 }
375
376 // Helper to delete all the deletable blobs on the list.
DeleteNoiseBlobs(BLOBNBOX_LIST * blobs)377 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) {
378 BLOBNBOX_IT blob_it(blobs);
379 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
380 BLOBNBOX *blob = blob_it.data();
381 if (blob->DeletableNoise()) {
382 delete blob->remove_cblob();
383 delete blob_it.extract();
384 }
385 }
386 }
387
388 // Helper to compute edge offsets for all the blobs on the list.
389 // See coutln.h for an explanation of edge offsets.
ComputeEdgeOffsets(Image thresholds,Image grey,BLOBNBOX_LIST * blobs)390 void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) {
391 int grey_height = 0;
392 int thr_height = 0;
393 int scale_factor = 1;
394 if (thresholds != nullptr && grey != nullptr) {
395 grey_height = pixGetHeight(grey);
396 thr_height = pixGetHeight(thresholds);
397 scale_factor = IntCastRounded(static_cast<double>(grey_height) / thr_height);
398 }
399 BLOBNBOX_IT blob_it(blobs);
400 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
401 BLOBNBOX *blob = blob_it.data();
402 if (blob->cblob() != nullptr) {
403 // Get the threshold that applies to this blob.
404 l_uint32 threshold = 128;
405 if (thresholds != nullptr && grey != nullptr) {
406 const TBOX &box = blob->cblob()->bounding_box();
407 // Transform the coordinates if required.
408 TPOINT pt((box.left() + box.right()) / 2, (box.top() + box.bottom()) / 2);
409 pixGetPixel(thresholds, pt.x / scale_factor, thr_height - 1 - pt.y / scale_factor,
410 &threshold);
411 }
412 blob->cblob()->ComputeEdgeOffsets(threshold, grey);
413 }
414 }
415 }
416
417 #ifndef GRAPHICS_DISABLED
418 // Helper to draw all the blobs on the list in the given body_colour,
419 // with child outlines in the child_colour.
PlotBlobs(BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour,ScrollView * win)420 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
421 ScrollView::Color child_colour, ScrollView *win) {
422 BLOBNBOX_IT it(list);
423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
424 it.data()->plot(win, body_colour, child_colour);
425 }
426 }
427
428 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
429 // given list in the given body_colour, with child outlines in the
430 // child_colour.
PlotNoiseBlobs(BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour,ScrollView * win)431 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
432 ScrollView::Color child_colour, ScrollView *win) {
433 BLOBNBOX_IT it(list);
434 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
435 BLOBNBOX *blob = it.data();
436 if (blob->DeletableNoise()) {
437 blob->plot(win, body_colour, child_colour);
438 }
439 }
440 }
441
TextlineColor(BlobRegionType region_type,BlobTextFlowType flow_type)442 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type) {
443 switch (region_type) {
444 case BRT_HLINE:
445 return ScrollView::BROWN;
446 case BRT_VLINE:
447 return ScrollView::DARK_GREEN;
448 case BRT_RECTIMAGE:
449 return ScrollView::RED;
450 case BRT_POLYIMAGE:
451 return ScrollView::ORANGE;
452 case BRT_UNKNOWN:
453 return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
454 case BRT_VERT_TEXT:
455 if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) {
456 return ScrollView::GREEN;
457 }
458 if (flow_type == BTFT_CHAIN) {
459 return ScrollView::LIME_GREEN;
460 }
461 return ScrollView::YELLOW;
462 case BRT_TEXT:
463 if (flow_type == BTFT_STRONG_CHAIN) {
464 return ScrollView::BLUE;
465 }
466 if (flow_type == BTFT_TEXT_ON_IMAGE) {
467 return ScrollView::LIGHT_BLUE;
468 }
469 if (flow_type == BTFT_CHAIN) {
470 return ScrollView::MEDIUM_BLUE;
471 }
472 if (flow_type == BTFT_LEADER) {
473 return ScrollView::WHEAT;
474 }
475 if (flow_type == BTFT_NONTEXT) {
476 return ScrollView::PINK;
477 }
478 return ScrollView::MAGENTA;
479 default:
480 return ScrollView::GREY;
481 }
482 }
483
484 // Keep in sync with BlobRegionType.
BoxColor() const485 ScrollView::Color BLOBNBOX::BoxColor() const {
486 return TextlineColor(region_type_, flow_);
487 }
488
plot(ScrollView * window,ScrollView::Color blob_colour,ScrollView::Color child_colour)489 void BLOBNBOX::plot(ScrollView *window, // window to draw in
490 ScrollView::Color blob_colour, // for outer bits
491 ScrollView::Color child_colour) { // for holes
492 if (cblob_ptr != nullptr) {
493 cblob_ptr->plot(window, blob_colour, child_colour);
494 }
495 }
496 #endif
497 /**********************************************************************
498 * find_cblob_limits
499 *
500 * Scan the outlines of the cblob to locate the y min and max
501 * between the given x limits.
502 **********************************************************************/
503
find_cblob_limits(C_BLOB * blob,float leftx,float rightx,FCOORD rotation,float & ymin,float & ymax)504 void find_cblob_limits( // get y limits
505 C_BLOB *blob, // blob to search
506 float leftx, // x limits
507 float rightx,
508 FCOORD rotation, // for landscape
509 float &ymin, // output y limits
510 float &ymax) {
511 int16_t stepindex; // current point
512 ICOORD pos; // current coords
513 ICOORD vec; // rotated step
514 C_OUTLINE *outline; // current outline
515 // outlines
516 C_OUTLINE_IT out_it = blob->out_list();
517
518 ymin = static_cast<float>(INT32_MAX);
519 ymax = static_cast<float>(-INT32_MAX);
520 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
521 outline = out_it.data();
522 pos = outline->start_pos(); // get coords
523 pos.rotate(rotation);
524 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
525 // inside
526 if (pos.x() >= leftx && pos.x() <= rightx) {
527 UpdateRange(pos.y(), &ymin, &ymax);
528 }
529 vec = outline->step(stepindex);
530 vec.rotate(rotation);
531 pos += vec; // move to next
532 }
533 }
534 }
535
536 /**********************************************************************
537 * find_cblob_vlimits
538 *
539 * Scan the outlines of the cblob to locate the y min and max
540 * between the given x limits.
541 **********************************************************************/
542
find_cblob_vlimits(C_BLOB * blob,float leftx,float rightx,float & ymin,float & ymax)543 void find_cblob_vlimits( // get y limits
544 C_BLOB *blob, // blob to search
545 float leftx, // x limits
546 float rightx,
547 float &ymin, // output y limits
548 float &ymax) {
549 int16_t stepindex; // current point
550 ICOORD pos; // current coords
551 ICOORD vec; // rotated step
552 C_OUTLINE *outline; // current outline
553 // outlines
554 C_OUTLINE_IT out_it = blob->out_list();
555
556 ymin = static_cast<float>(INT32_MAX);
557 ymax = static_cast<float>(-INT32_MAX);
558 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
559 outline = out_it.data();
560 pos = outline->start_pos(); // get coords
561 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
562 // inside
563 if (pos.x() >= leftx && pos.x() <= rightx) {
564 UpdateRange(pos.y(), &ymin, &ymax);
565 }
566 vec = outline->step(stepindex);
567 pos += vec; // move to next
568 }
569 }
570 }
571
572 /**********************************************************************
573 * find_cblob_hlimits
574 *
575 * Scan the outlines of the cblob to locate the x min and max
576 * between the given y limits.
577 **********************************************************************/
578
find_cblob_hlimits(C_BLOB * blob,float bottomy,float topy,float & xmin,float & xmax)579 void find_cblob_hlimits( // get x limits
580 C_BLOB *blob, // blob to search
581 float bottomy, // y limits
582 float topy,
583 float &xmin, // output x limits
584 float &xmax) {
585 int16_t stepindex; // current point
586 ICOORD pos; // current coords
587 ICOORD vec; // rotated step
588 C_OUTLINE *outline; // current outline
589 // outlines
590 C_OUTLINE_IT out_it = blob->out_list();
591
592 xmin = static_cast<float>(INT32_MAX);
593 xmax = static_cast<float>(-INT32_MAX);
594 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
595 outline = out_it.data();
596 pos = outline->start_pos(); // get coords
597 for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
598 // inside
599 if (pos.y() >= bottomy && pos.y() <= topy) {
600 UpdateRange(pos.x(), &xmin, &xmax);
601 }
602 vec = outline->step(stepindex);
603 pos += vec; // move to next
604 }
605 }
606 }
607
608 /**********************************************************************
609 * crotate_cblob
610 *
611 * Rotate the copy by the given vector and return a C_BLOB.
612 **********************************************************************/
613
crotate_cblob(C_BLOB * blob,FCOORD rotation)614 C_BLOB *crotate_cblob( // rotate it
615 C_BLOB *blob, // blob to search
616 FCOORD rotation // for landscape
617 ) {
618 C_OUTLINE_LIST out_list; // output outlines
619 // input outlines
620 C_OUTLINE_IT in_it = blob->out_list();
621 // output outlines
622 C_OUTLINE_IT out_it = &out_list;
623
624 for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) {
625 out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation));
626 }
627 return new C_BLOB(&out_list);
628 }
629
630 /**********************************************************************
631 * box_next
632 *
633 * Compute the bounding box of this blob with merging of x overlaps
634 * but no pre-chopping.
635 * Then move the iterator on to the start of the next blob.
636 **********************************************************************/
637
box_next(BLOBNBOX_IT * it)638 TBOX box_next( // get bounding box
639 BLOBNBOX_IT *it // iterator to blobds
640 ) {
641 BLOBNBOX *blob; // current blob
642 TBOX result; // total box
643
644 blob = it->data();
645 result = blob->bounding_box();
646 do {
647 it->forward();
648 blob = it->data();
649 if (blob->cblob() == nullptr) {
650 // was pre-chopped
651 result += blob->bounding_box();
652 }
653 }
654 // until next real blob
655 while ((blob->cblob() == nullptr) || blob->joined_to_prev());
656 return result;
657 }
658
659 /**********************************************************************
660 * box_next_pre_chopped
661 *
662 * Compute the bounding box of this blob with merging of x overlaps
663 * but WITH pre-chopping.
664 * Then move the iterator on to the start of the next pre-chopped blob.
665 **********************************************************************/
666
box_next_pre_chopped(BLOBNBOX_IT * it)667 TBOX box_next_pre_chopped( // get bounding box
668 BLOBNBOX_IT *it // iterator to blobds
669 ) {
670 BLOBNBOX *blob; // current blob
671 TBOX result; // total box
672
673 blob = it->data();
674 result = blob->bounding_box();
675 do {
676 it->forward();
677 blob = it->data();
678 }
679 // until next real blob
680 while (blob->joined_to_prev());
681 return result;
682 }
683
684 /**********************************************************************
685 * TO_ROW::TO_ROW
686 *
687 * Constructor to make a row from a blob.
688 **********************************************************************/
689
TO_ROW(BLOBNBOX * blob,float top,float bottom,float row_size)690 TO_ROW::TO_ROW( // constructor
691 BLOBNBOX *blob, // first blob
692 float top, // corrected top
693 float bottom, // of row
694 float row_size // ideal
695 ) {
696 clear();
697 y_min = bottom;
698 y_max = top;
699 initial_y_min = bottom;
700
701 float diff; // in size
702 BLOBNBOX_IT it = &blobs; // list of blobs
703
704 it.add_to_end(blob);
705 diff = top - bottom - row_size;
706 if (diff > 0) {
707 y_max -= diff / 2;
708 y_min += diff / 2;
709 }
710 // very small object
711 else if ((top - bottom) * 3 < row_size) {
712 diff = row_size / 3 + bottom - top;
713 y_max += diff / 2;
714 y_min -= diff / 2;
715 }
716 }
717
print() const718 void TO_ROW::print() const {
719 tprintf(
720 "pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g,"
721 " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g,"
722 " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n",
723 pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, spacing, xheight,
724 y_origin, xheight_evidence, ascrise, descdrop, body_size, min_space, max_nonspace,
725 space_threshold, kern_size, space_size);
726 }
727
728 /**********************************************************************
729 * TO_ROW:add_blob
730 *
731 * Add the blob to the end of the row.
732 **********************************************************************/
733
add_blob(BLOBNBOX * blob,float top,float bottom,float row_size)734 void TO_ROW::add_blob( // constructor
735 BLOBNBOX *blob, // first blob
736 float top, // corrected top
737 float bottom, // of row
738 float row_size // ideal
739 ) {
740 float allowed; // allowed expansion
741 float available; // expansion
742 BLOBNBOX_IT it = &blobs; // list of blobs
743
744 it.add_to_end(blob);
745 allowed = row_size + y_min - y_max;
746 if (allowed > 0) {
747 available = top > y_max ? top - y_max : 0;
748 if (bottom < y_min) {
749 // total available
750 available += y_min - bottom;
751 }
752 if (available > 0) {
753 available += available; // do it gradually
754 if (available < allowed) {
755 available = allowed;
756 }
757 if (bottom < y_min) {
758 y_min -= (y_min - bottom) * allowed / available;
759 }
760 if (top > y_max) {
761 y_max += (top - y_max) * allowed / available;
762 }
763 }
764 }
765 }
766
767 /**********************************************************************
768 * TO_ROW:insert_blob
769 *
770 * Add the blob to the row in the correct position.
771 **********************************************************************/
772
insert_blob(BLOBNBOX * blob)773 void TO_ROW::insert_blob( // constructor
774 BLOBNBOX *blob // first blob
775 ) {
776 BLOBNBOX_IT it = &blobs; // list of blobs
777
778 if (it.empty()) {
779 it.add_before_then_move(blob);
780 } else {
781 it.mark_cycle_pt();
782 while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) {
783 it.forward();
784 }
785 if (it.cycled_list()) {
786 it.add_to_end(blob);
787 } else {
788 it.add_before_stay_put(blob);
789 }
790 }
791 }
792
793 /**********************************************************************
794 * TO_ROW::compute_vertical_projection
795 *
796 * Compute the vertical projection of a TO_ROW from its blobs.
797 **********************************************************************/
798
compute_vertical_projection()799 void TO_ROW::compute_vertical_projection() { // project whole row
800 TBOX row_box; // bound of row
801 BLOBNBOX *blob; // current blob
802 TBOX blob_box; // bounding box
803 BLOBNBOX_IT blob_it = blob_list();
804
805 if (blob_it.empty()) {
806 return;
807 }
808 row_box = blob_it.data()->bounding_box();
809 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
810 row_box += blob_it.data()->bounding_box();
811 }
812
813 projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
814 projection_left = row_box.left() - PROJECTION_MARGIN;
815 projection_right = row_box.right() + PROJECTION_MARGIN;
816 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
817 blob = blob_it.data();
818 if (blob->cblob() != nullptr) {
819 vertical_cblob_projection(blob->cblob(), &projection);
820 }
821 }
822 }
823
824 /**********************************************************************
825 * TO_ROW::clear
826 *
827 * Zero out all scalar members.
828 **********************************************************************/
clear()829 void TO_ROW::clear() {
830 all_caps = false;
831 used_dm_model = false;
832 projection_left = 0;
833 projection_right = 0;
834 pitch_decision = PITCH_DUNNO;
835 fixed_pitch = 0.0;
836 fp_space = 0.0;
837 fp_nonsp = 0.0;
838 pr_space = 0.0;
839 pr_nonsp = 0.0;
840 spacing = 0.0;
841 xheight = 0.0;
842 xheight_evidence = 0;
843 body_size = 0.0;
844 ascrise = 0.0;
845 descdrop = 0.0;
846 min_space = 0;
847 max_nonspace = 0;
848 space_threshold = 0;
849 kern_size = 0.0;
850 space_size = 0.0;
851 y_min = 0.0;
852 y_max = 0.0;
853 initial_y_min = 0.0;
854 m = 0.0;
855 c = 0.0;
856 error = 0.0;
857 para_c = 0.0;
858 para_error = 0.0;
859 y_origin = 0.0;
860 credibility = 0.0;
861 num_repeated_sets_ = -1;
862 }
863
864 /**********************************************************************
865 * vertical_cblob_projection
866 *
867 * Compute the vertical projection of a cblob from its outlines
868 * and add to the given STATS.
869 **********************************************************************/
870
vertical_cblob_projection(C_BLOB * blob,STATS * stats)871 void vertical_cblob_projection( // project outlines
872 C_BLOB *blob, // blob to project
873 STATS *stats // output
874 ) {
875 // outlines of blob
876 C_OUTLINE_IT out_it = blob->out_list();
877
878 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
879 vertical_coutline_projection(out_it.data(), stats);
880 }
881 }
882
883 /**********************************************************************
884 * vertical_coutline_projection
885 *
886 * Compute the vertical projection of a outline from its outlines
887 * and add to the given STATS.
888 **********************************************************************/
889
vertical_coutline_projection(C_OUTLINE * outline,STATS * stats)890 void vertical_coutline_projection( // project outlines
891 C_OUTLINE *outline, // outline to project
892 STATS *stats // output
893 ) {
894 ICOORD pos; // current point
895 ICOORD step; // edge step
896 int32_t length; // of outline
897 int16_t stepindex; // current step
898 C_OUTLINE_IT out_it = outline->child();
899
900 pos = outline->start_pos();
901 length = outline->pathlength();
902 for (stepindex = 0; stepindex < length; stepindex++) {
903 step = outline->step(stepindex);
904 if (step.x() > 0) {
905 stats->add(pos.x(), -pos.y());
906 } else if (step.x() < 0) {
907 stats->add(pos.x() - 1, pos.y());
908 }
909 pos += step;
910 }
911
912 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
913 vertical_coutline_projection(out_it.data(), stats);
914 }
915 }
916
917 /**********************************************************************
918 * TO_BLOCK::TO_BLOCK
919 *
920 * Constructor to make a TO_BLOCK from a real block.
921 **********************************************************************/
922
TO_BLOCK(BLOCK * src_block)923 TO_BLOCK::TO_BLOCK( // make a block
924 BLOCK *src_block // real block
925 ) {
926 clear();
927 block = src_block;
928 }
929
930 /**********************************************************************
931 * TO_BLOCK::clear
932 *
933 * Zero out all scalar members.
934 **********************************************************************/
clear()935 void TO_BLOCK::clear() {
936 block = nullptr;
937 pitch_decision = PITCH_DUNNO;
938 line_spacing = 0.0;
939 line_size = 0.0;
940 max_blob_size = 0.0;
941 baseline_offset = 0.0;
942 xheight = 0.0;
943 fixed_pitch = 0.0;
944 kern_size = 0.0;
945 space_size = 0.0;
946 min_space = 0;
947 max_nonspace = 0;
948 fp_space = 0.0;
949 fp_nonsp = 0.0;
950 pr_space = 0.0;
951 pr_nonsp = 0.0;
952 key_row = nullptr;
953 }
954
~TO_BLOCK()955 TO_BLOCK::~TO_BLOCK() {
956 // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
957 BLOBNBOX::clear_blobnboxes(&blobs);
958 BLOBNBOX::clear_blobnboxes(&underlines);
959 BLOBNBOX::clear_blobnboxes(&noise_blobs);
960 BLOBNBOX::clear_blobnboxes(&small_blobs);
961 BLOBNBOX::clear_blobnboxes(&large_blobs);
962 }
963
964 // Helper function to divide the input blobs over noise, small, medium
965 // and large lists. Blobs small in height and (small in width or large in width)
966 // go in the noise list. Dash (-) candidates go in the small list, and
967 // medium and large are by height.
968 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
SizeFilterBlobs(int min_height,int max_height,BLOBNBOX_LIST * src_list,BLOBNBOX_LIST * noise_list,BLOBNBOX_LIST * small_list,BLOBNBOX_LIST * medium_list,BLOBNBOX_LIST * large_list)969 static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_list,
970 BLOBNBOX_LIST *noise_list, BLOBNBOX_LIST *small_list,
971 BLOBNBOX_LIST *medium_list, BLOBNBOX_LIST *large_list) {
972 BLOBNBOX_IT noise_it(noise_list);
973 BLOBNBOX_IT small_it(small_list);
974 BLOBNBOX_IT medium_it(medium_list);
975 BLOBNBOX_IT large_it(large_list);
976 for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
977 BLOBNBOX *blob = src_it.extract();
978 blob->ReInit();
979 int width = blob->bounding_box().width();
980 int height = blob->bounding_box().height();
981 if (height < min_height && (width < min_height || width > max_height)) {
982 noise_it.add_after_then_move(blob);
983 } else if (height > max_height) {
984 large_it.add_after_then_move(blob);
985 } else if (height < min_height) {
986 small_it.add_after_then_move(blob);
987 } else {
988 medium_it.add_after_then_move(blob);
989 }
990 }
991 }
992
993 // Reorganize the blob lists with a different definition of small, medium
994 // and large, compared to the original definition.
995 // Height is still the primary filter key, but medium width blobs of small
996 // height become small, and very wide blobs of small height stay noise, along
997 // with small dot-shaped blobs.
ReSetAndReFilterBlobs()998 void TO_BLOCK::ReSetAndReFilterBlobs() {
999 int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
1000 int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
1001 BLOBNBOX_LIST noise_list;
1002 BLOBNBOX_LIST small_list;
1003 BLOBNBOX_LIST medium_list;
1004 BLOBNBOX_LIST large_list;
1005 SizeFilterBlobs(min_height, max_height, &blobs, &noise_list, &small_list, &medium_list,
1006 &large_list);
1007 SizeFilterBlobs(min_height, max_height, &large_blobs, &noise_list, &small_list, &medium_list,
1008 &large_list);
1009 SizeFilterBlobs(min_height, max_height, &small_blobs, &noise_list, &small_list, &medium_list,
1010 &large_list);
1011 SizeFilterBlobs(min_height, max_height, &noise_blobs, &noise_list, &small_list, &medium_list,
1012 &large_list);
1013 BLOBNBOX_IT blob_it(&blobs);
1014 blob_it.add_list_after(&medium_list);
1015 blob_it.set_to_list(&large_blobs);
1016 blob_it.add_list_after(&large_list);
1017 blob_it.set_to_list(&small_blobs);
1018 blob_it.add_list_after(&small_list);
1019 blob_it.set_to_list(&noise_blobs);
1020 blob_it.add_list_after(&noise_list);
1021 }
1022
1023 // Deletes noise blobs from all lists where not owned by a ColPartition.
DeleteUnownedNoise()1024 void TO_BLOCK::DeleteUnownedNoise() {
1025 BLOBNBOX::CleanNeighbours(&blobs);
1026 BLOBNBOX::CleanNeighbours(&small_blobs);
1027 BLOBNBOX::CleanNeighbours(&noise_blobs);
1028 BLOBNBOX::CleanNeighbours(&large_blobs);
1029 BLOBNBOX::DeleteNoiseBlobs(&blobs);
1030 BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
1031 BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
1032 BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
1033 }
1034
1035 // Computes and stores the edge offsets on each blob for use in feature
1036 // extraction, using greyscale if the supplied grey and thresholds pixes
1037 // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary
1038 // edge step outlines.
1039 // Thresholds must either be the same size as grey or an integer down-scale
1040 // of grey.
1041 // See coutln.h for an explanation of edge offsets.
ComputeEdgeOffsets(Image thresholds,Image grey)1042 void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) {
1043 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
1044 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
1045 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
1046 }
1047
1048 #ifndef GRAPHICS_DISABLED
1049 // Draw the noise blobs from all lists in red.
plot_noise_blobs(ScrollView * win)1050 void TO_BLOCK::plot_noise_blobs(ScrollView *win) {
1051 BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
1052 BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
1053 BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
1054 BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
1055 }
1056
1057 // Draw the blobs on the various lists in the block in different colors.
plot_graded_blobs(ScrollView * win)1058 void TO_BLOCK::plot_graded_blobs(ScrollView *win) {
1059 BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
1060 BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, win);
1061 BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, win);
1062 BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
1063 }
1064
1065 /**********************************************************************
1066 * plot_blob_list
1067 *
1068 * Draw a list of blobs.
1069 **********************************************************************/
1070
plot_blob_list(ScrollView * win,BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour)1071 void plot_blob_list(ScrollView *win, // window to draw in
1072 BLOBNBOX_LIST *list, // blob list
1073 ScrollView::Color body_colour, // colour to draw
1074 ScrollView::Color child_colour) { // colour of child
1075 BLOBNBOX_IT it = list;
1076 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1077 it.data()->plot(win, body_colour, child_colour);
1078 }
1079 }
1080 #endif // !GRAPHICS_DISABLED
1081
1082 } // namespace tesseract
1083