1 /**********************************************************************
2  * File:        blobbox.cpp  (Formerly blobnbox.c)
3  * Description: Code for the textord blob class.
4  * Author:      Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #  include "config_auto.h"
22 #endif
23 
24 #include "blobbox.h"
25 #include "blobs.h"   // for TPOINT
26 #include "coutln.h"  // for C_OUTLINE_IT, C_OUTLINE, C_OUTLINE_LIST
27 #include "environ.h" // for l_uint32
28 #include "host.h"    // for NearlyEqual
29 #include "points.h"  // for operator+=, ICOORD::rotate
30 
31 #include "helpers.h" // for UpdateRange, IntCastRounded
32 
33 #include <allheaders.h> // for pixGetHeight, pixGetPixel
34 
35 #include <algorithm> // for max, min
36 #include <cmath>
37 #include <cstdint>   // for INT32_MAX, INT16_MAX
38 
39 #define PROJECTION_MARGIN 10 // arbitrary
40 
41 namespace tesseract {
42 
43 // Up to 30 degrees is allowed for rotations of diacritic blobs.
44 const double kCosSmallAngle = 0.866;
45 // Min aspect ratio for a joined word to indicate an obvious flow direction.
46 const double kDefiniteAspectRatio = 2.0;
47 // Multiple of short length in perimeter to make a joined word.
48 const double kComplexShapePerimeterRatio = 1.5;
49 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
50 const double kMinMediumSizeRatio = 0.25;
51 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
52 const double kMaxMediumSizeRatio = 4.0;
53 
54 // Rotates the box and the underlying blob.
rotate(FCOORD rotation)55 void BLOBNBOX::rotate(FCOORD rotation) {
56   cblob_ptr->rotate(rotation);
57   rotate_box(rotation);
58   compute_bounding_box();
59 }
60 
61 // Reflect the box in the y-axis, leaving the underlying blob untouched.
reflect_box_in_y_axis()62 void BLOBNBOX::reflect_box_in_y_axis() {
63   int left = -box.right();
64   box.set_right(-box.left());
65   box.set_left(left);
66 }
67 
68 // Rotates the box by the angle given by rotation.
69 // If the blob is a diacritic, then only small rotations for skew
70 // correction can be applied.
rotate_box(FCOORD rotation)71 void BLOBNBOX::rotate_box(FCOORD rotation) {
72   if (IsDiacritic()) {
73     ASSERT_HOST(rotation.x() >= kCosSmallAngle);
74     ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
75     ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
76     top_pt.rotate(rotation);
77     base_char_top_ = top_pt.y();
78     bottom_pt.rotate(rotation);
79     base_char_bottom_ = bottom_pt.y();
80     box.rotate(rotation);
81   } else {
82     box.rotate(rotation);
83     set_diacritic_box(box);
84   }
85 }
86 
87 /**********************************************************************
88  * BLOBNBOX::merge
89  *
90  * Merge this blob with the given blob, which should be after this.
91  **********************************************************************/
merge(BLOBNBOX * nextblob)92 void BLOBNBOX::merge(  // merge blobs
93     BLOBNBOX *nextblob // blob to join with
94 ) {
95   box += nextblob->box; // merge boxes
96   set_diacritic_box(box);
97   nextblob->joined = true;
98 }
99 
100 // Merge this with other, taking the outlines from other.
101 // Other is not deleted, but left for the caller to handle.
really_merge(BLOBNBOX * other)102 void BLOBNBOX::really_merge(BLOBNBOX *other) {
103   if (other->cblob_ptr != nullptr) {
104     C_OUTLINE_IT ol_it(cblob_ptr->out_list());
105     ol_it.add_list_after(other->cblob_ptr->out_list());
106   }
107   compute_bounding_box();
108 }
109 
110 /**********************************************************************
111  * BLOBNBOX::chop
112  *
113  * Chop this blob into equal sized pieces using the x height as a guide.
114  * The blob is not actually chopped. Instead, fake blobs are inserted
115  * with the relevant bounding boxes.
116  **********************************************************************/
117 
chop(BLOBNBOX_IT * start_it,BLOBNBOX_IT * end_it,FCOORD rotation,float xheight)118 void BLOBNBOX::chop(       // chop blobs
119     BLOBNBOX_IT *start_it, // location of this
120     BLOBNBOX_IT *end_it,   // iterator
121     FCOORD rotation,       // for landscape
122     float xheight          // of line
123 ) {
124   int16_t blobcount;          // no of blobs
125   BLOBNBOX *newblob;          // fake blob
126   BLOBNBOX *blob;             // current blob
127   int16_t blobindex;          // number of chop
128   int16_t leftx;              // left edge of blob
129   float blobwidth;            // width of each
130   float rightx;               // right edge to scan
131   float ymin, ymax;           // limits of new blob
132   float test_ymin, test_ymax; // limits of part blob
133   ICOORD bl, tr;              // corners of box
134   BLOBNBOX_IT blob_it;        // blob iterator
135 
136   // get no of chops
137   blobcount = static_cast<int16_t>(std::floor(box.width() / xheight));
138   if (blobcount > 1 && cblob_ptr != nullptr) {
139     // width of each
140     blobwidth = static_cast<float>(box.width() + 1) / blobcount;
141     for (blobindex = blobcount - 1, rightx = box.right(); blobindex >= 0;
142          blobindex--, rightx -= blobwidth) {
143       ymin = static_cast<float>(INT32_MAX);
144       ymax = static_cast<float>(-INT32_MAX);
145       blob_it = *start_it;
146       do {
147         blob = blob_it.data();
148         find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, rightx,
149                            /*rotation, */ test_ymin, test_ymax);
150         blob_it.forward();
151         UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
152       } while (blob != end_it->data());
153       if (ymin < ymax) {
154         leftx = static_cast<int16_t>(std::floor(rightx - blobwidth));
155         if (leftx < box.left()) {
156           leftx = box.left(); // clip to real box
157         }
158         bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin)));
159         tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax)));
160         if (blobindex == 0) {
161           box = TBOX(bl, tr); // change box
162         } else {
163           newblob = new BLOBNBOX;
164           // box is all it has
165           newblob->box = TBOX(bl, tr);
166           // stay on current
167           newblob->base_char_top_ = tr.y();
168           newblob->base_char_bottom_ = bl.y();
169           end_it->add_after_stay_put(newblob);
170         }
171       }
172     }
173   }
174 }
175 
176 // Returns the box gaps between this and its neighbours_ in an array
177 // indexed by BlobNeighbourDir.
NeighbourGaps(int gaps[BND_COUNT]) const178 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
179   for (int dir = 0; dir < BND_COUNT; ++dir) {
180     gaps[dir] = INT16_MAX;
181     BLOBNBOX *neighbour = neighbours_[dir];
182     if (neighbour != nullptr) {
183       const TBOX &n_box = neighbour->bounding_box();
184       if (dir == BND_LEFT || dir == BND_RIGHT) {
185         gaps[dir] = box.x_gap(n_box);
186       } else {
187         gaps[dir] = box.y_gap(n_box);
188       }
189     }
190   }
191 }
192 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
193 // modified so that if the max exceeds the max dimension of the blob, and
194 // the min is less, the max is replaced with the min.
195 // The objective is to catch cases where there is only a single neighbour
196 // and avoid reporting the other gap as a ridiculously large number
MinMaxGapsClipped(int * h_min,int * h_max,int * v_min,int * v_max) const197 void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const {
198   int max_dimension = std::max(box.width(), box.height());
199   int gaps[BND_COUNT];
200   NeighbourGaps(gaps);
201   *h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]);
202   *h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]);
203   if (*h_max > max_dimension && *h_min < max_dimension) {
204     *h_max = *h_min;
205   }
206   *v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]);
207   *v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]);
208   if (*v_max > max_dimension && *v_min < max_dimension) {
209     *v_max = *v_min;
210   }
211 }
212 
213 // Nulls out any neighbours that are DeletableNoise to remove references.
CleanNeighbours()214 void BLOBNBOX::CleanNeighbours() {
215   for (int dir = 0; dir < BND_COUNT; ++dir) {
216     BLOBNBOX *neighbour = neighbours_[dir];
217     if (neighbour != nullptr && neighbour->DeletableNoise()) {
218       neighbours_[dir] = nullptr;
219       good_stroke_neighbours_[dir] = false;
220     }
221   }
222 }
223 
224 // Returns positive if there is at least one side neighbour that has a similar
225 // stroke width and is not on the other side of a rule line.
GoodTextBlob() const226 int BLOBNBOX::GoodTextBlob() const {
227   int score = 0;
228   for (int dir = 0; dir < BND_COUNT; ++dir) {
229     auto bnd = static_cast<BlobNeighbourDir>(dir);
230     if (good_stroke_neighbour(bnd)) {
231       ++score;
232     }
233   }
234   return score;
235 }
236 
237 // Returns the number of side neighbours that are of type BRT_NOISE.
NoisyNeighbours() const238 int BLOBNBOX::NoisyNeighbours() const {
239   int count = 0;
240   for (int dir = 0; dir < BND_COUNT; ++dir) {
241     auto bnd = static_cast<BlobNeighbourDir>(dir);
242     BLOBNBOX *blob = neighbour(bnd);
243     if (blob != nullptr && blob->region_type() == BRT_NOISE) {
244       ++count;
245     }
246   }
247   return count;
248 }
249 
250 // Returns true, and sets vert_possible/horz_possible if the blob has some
251 // feature that makes it individually appear to flow one way.
252 // eg if it has a high aspect ratio, yet has a complex shape, such as a
253 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
DefiniteIndividualFlow()254 bool BLOBNBOX::DefiniteIndividualFlow() {
255   if (cblob() == nullptr) {
256     return false;
257   }
258   int box_perimeter = 2 * (box.height() + box.width());
259   if (box.width() > box.height() * kDefiniteAspectRatio) {
260     // Attempt to distinguish a wide joined word from a dash.
261     // If it is a dash, then its perimeter is approximately
262     // 2 * (box width + stroke width), but more if the outline is noisy,
263     // so perimeter - 2*(box width + stroke width) should be close to zero.
264     // A complex shape such as a joined word should have a much larger value.
265     int perimeter = cblob()->perimeter();
266     if (vert_stroke_width() > 0 || perimeter <= 0) {
267       perimeter -= 2 * vert_stroke_width();
268     } else {
269       perimeter -= 4 * cblob()->area() / perimeter;
270     }
271     perimeter -= 2 * box.width();
272     // Use a multiple of the box perimeter as a threshold.
273     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
274       set_vert_possible(false);
275       set_horz_possible(true);
276       return true;
277     }
278   }
279   if (box.height() > box.width() * kDefiniteAspectRatio) {
280     // As above, but for a putative vertical word vs a I/1/l.
281     int perimeter = cblob()->perimeter();
282     if (horz_stroke_width() > 0 || perimeter <= 0) {
283       perimeter -= 2 * horz_stroke_width();
284     } else {
285       perimeter -= 4 * cblob()->area() / perimeter;
286     }
287     perimeter -= 2 * box.height();
288     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
289       set_vert_possible(true);
290       set_horz_possible(false);
291       return true;
292     }
293   }
294   return false;
295 }
296 
297 // Returns true if there is no tabstop violation in merging this and other.
ConfirmNoTabViolation(const BLOBNBOX & other) const298 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const {
299   if (box.left() < other.box.left() && box.left() < other.left_rule_) {
300     return false;
301   }
302   if (other.box.left() < box.left() && other.box.left() < left_rule_) {
303     return false;
304   }
305   if (box.right() > other.box.right() && box.right() > other.right_rule_) {
306     return false;
307   }
308   if (other.box.right() > box.right() && other.box.right() > right_rule_) {
309     return false;
310   }
311   return true;
312 }
313 
314 // Returns true if other has a similar stroke width to this.
MatchingStrokeWidth(const BLOBNBOX & other,double fractional_tolerance,double constant_tolerance) const315 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance,
316                                    double constant_tolerance) const {
317   // The perimeter-based width is used as a backup in case there is
318   // no information in the blob.
319   double p_width = area_stroke_width();
320   double n_p_width = other.area_stroke_width();
321   float h_tolerance = horz_stroke_width_ * fractional_tolerance + constant_tolerance;
322   float v_tolerance = vert_stroke_width_ * fractional_tolerance + constant_tolerance;
323   double p_tolerance = p_width * fractional_tolerance + constant_tolerance;
324   bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
325   bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
326   bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, other.horz_stroke_width_, h_tolerance);
327   bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, other.vert_stroke_width_, v_tolerance);
328   bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
329   // For a match, at least one of the horizontal and vertical widths
330   // must match, and the other one must either match or be zero.
331   // Only if both are zero will we look at the perimeter metric.
332   return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
333 }
334 
335 // Returns a bounding box of the outline contained within the
336 // given horizontal range.
BoundsWithinLimits(int left,int right)337 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
338   FCOORD no_rotation(1.0f, 0.0f);
339   float top = box.top();
340   float bottom = box.bottom();
341   if (cblob_ptr != nullptr) {
342     find_cblob_limits(cblob_ptr, static_cast<float>(left), static_cast<float>(right), no_rotation,
343                       bottom, top);
344   }
345 
346   if (top < bottom) {
347     top = box.top();
348     bottom = box.bottom();
349   }
350   FCOORD bot_left(left, bottom);
351   FCOORD top_right(right, top);
352   TBOX shrunken_box(bot_left);
353   TBOX shrunken_box2(top_right);
354   shrunken_box += shrunken_box2;
355   return shrunken_box;
356 }
357 
358 // Estimates and stores the baseline position based on the shape of the
359 // outline.
EstimateBaselinePosition()360 void BLOBNBOX::EstimateBaselinePosition() {
361   baseline_y_ = box.bottom(); // The default.
362   if (cblob_ptr == nullptr) {
363     return;
364   }
365   baseline_y_ = cblob_ptr->EstimateBaselinePosition();
366 }
367 
368 // Helper to call CleanNeighbours on all blobs on the list.
CleanNeighbours(BLOBNBOX_LIST * blobs)369 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST *blobs) {
370   BLOBNBOX_IT blob_it(blobs);
371   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
372     blob_it.data()->CleanNeighbours();
373   }
374 }
375 
376 // Helper to delete all the deletable blobs on the list.
DeleteNoiseBlobs(BLOBNBOX_LIST * blobs)377 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) {
378   BLOBNBOX_IT blob_it(blobs);
379   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
380     BLOBNBOX *blob = blob_it.data();
381     if (blob->DeletableNoise()) {
382       delete blob->remove_cblob();
383       delete blob_it.extract();
384     }
385   }
386 }
387 
388 // Helper to compute edge offsets for  all the blobs on the list.
389 // See coutln.h for an explanation of edge offsets.
ComputeEdgeOffsets(Image thresholds,Image grey,BLOBNBOX_LIST * blobs)390 void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) {
391   int grey_height = 0;
392   int thr_height = 0;
393   int scale_factor = 1;
394   if (thresholds != nullptr && grey != nullptr) {
395     grey_height = pixGetHeight(grey);
396     thr_height = pixGetHeight(thresholds);
397     scale_factor = IntCastRounded(static_cast<double>(grey_height) / thr_height);
398   }
399   BLOBNBOX_IT blob_it(blobs);
400   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
401     BLOBNBOX *blob = blob_it.data();
402     if (blob->cblob() != nullptr) {
403       // Get the threshold that applies to this blob.
404       l_uint32 threshold = 128;
405       if (thresholds != nullptr && grey != nullptr) {
406         const TBOX &box = blob->cblob()->bounding_box();
407         // Transform the coordinates if required.
408         TPOINT pt((box.left() + box.right()) / 2, (box.top() + box.bottom()) / 2);
409         pixGetPixel(thresholds, pt.x / scale_factor, thr_height - 1 - pt.y / scale_factor,
410                     &threshold);
411       }
412       blob->cblob()->ComputeEdgeOffsets(threshold, grey);
413     }
414   }
415 }
416 
417 #ifndef GRAPHICS_DISABLED
418 // Helper to draw all the blobs on the list in the given body_colour,
419 // with child outlines in the child_colour.
PlotBlobs(BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour,ScrollView * win)420 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
421                          ScrollView::Color child_colour, ScrollView *win) {
422   BLOBNBOX_IT it(list);
423   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
424     it.data()->plot(win, body_colour, child_colour);
425   }
426 }
427 
428 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
429 // given list in the given body_colour, with child outlines in the
430 // child_colour.
PlotNoiseBlobs(BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour,ScrollView * win)431 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
432                               ScrollView::Color child_colour, ScrollView *win) {
433   BLOBNBOX_IT it(list);
434   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
435     BLOBNBOX *blob = it.data();
436     if (blob->DeletableNoise()) {
437       blob->plot(win, body_colour, child_colour);
438     }
439   }
440 }
441 
TextlineColor(BlobRegionType region_type,BlobTextFlowType flow_type)442 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type) {
443   switch (region_type) {
444     case BRT_HLINE:
445       return ScrollView::BROWN;
446     case BRT_VLINE:
447       return ScrollView::DARK_GREEN;
448     case BRT_RECTIMAGE:
449       return ScrollView::RED;
450     case BRT_POLYIMAGE:
451       return ScrollView::ORANGE;
452     case BRT_UNKNOWN:
453       return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
454     case BRT_VERT_TEXT:
455       if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) {
456         return ScrollView::GREEN;
457       }
458       if (flow_type == BTFT_CHAIN) {
459         return ScrollView::LIME_GREEN;
460       }
461       return ScrollView::YELLOW;
462     case BRT_TEXT:
463       if (flow_type == BTFT_STRONG_CHAIN) {
464         return ScrollView::BLUE;
465       }
466       if (flow_type == BTFT_TEXT_ON_IMAGE) {
467         return ScrollView::LIGHT_BLUE;
468       }
469       if (flow_type == BTFT_CHAIN) {
470         return ScrollView::MEDIUM_BLUE;
471       }
472       if (flow_type == BTFT_LEADER) {
473         return ScrollView::WHEAT;
474       }
475       if (flow_type == BTFT_NONTEXT) {
476         return ScrollView::PINK;
477       }
478       return ScrollView::MAGENTA;
479     default:
480       return ScrollView::GREY;
481   }
482 }
483 
484 // Keep in sync with BlobRegionType.
BoxColor() const485 ScrollView::Color BLOBNBOX::BoxColor() const {
486   return TextlineColor(region_type_, flow_);
487 }
488 
plot(ScrollView * window,ScrollView::Color blob_colour,ScrollView::Color child_colour)489 void BLOBNBOX::plot(ScrollView *window,               // window to draw in
490                     ScrollView::Color blob_colour,    // for outer bits
491                     ScrollView::Color child_colour) { // for holes
492   if (cblob_ptr != nullptr) {
493     cblob_ptr->plot(window, blob_colour, child_colour);
494   }
495 }
496 #endif
497 /**********************************************************************
498  * find_cblob_limits
499  *
500  * Scan the outlines of the cblob to locate the y min and max
501  * between the given x limits.
502  **********************************************************************/
503 
find_cblob_limits(C_BLOB * blob,float leftx,float rightx,FCOORD rotation,float & ymin,float & ymax)504 void find_cblob_limits( // get y limits
505     C_BLOB *blob,       // blob to search
506     float leftx,        // x limits
507     float rightx,
508     FCOORD rotation, // for landscape
509     float &ymin,     // output y limits
510     float &ymax) {
511   int16_t stepindex;  // current point
512   ICOORD pos;         // current coords
513   ICOORD vec;         // rotated step
514   C_OUTLINE *outline; // current outline
515                       // outlines
516   C_OUTLINE_IT out_it = blob->out_list();
517 
518   ymin = static_cast<float>(INT32_MAX);
519   ymax = static_cast<float>(-INT32_MAX);
520   for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
521     outline = out_it.data();
522     pos = outline->start_pos(); // get coords
523     pos.rotate(rotation);
524     for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
525       // inside
526       if (pos.x() >= leftx && pos.x() <= rightx) {
527         UpdateRange(pos.y(), &ymin, &ymax);
528       }
529       vec = outline->step(stepindex);
530       vec.rotate(rotation);
531       pos += vec; // move to next
532     }
533   }
534 }
535 
536 /**********************************************************************
537  * find_cblob_vlimits
538  *
539  * Scan the outlines of the cblob to locate the y min and max
540  * between the given x limits.
541  **********************************************************************/
542 
find_cblob_vlimits(C_BLOB * blob,float leftx,float rightx,float & ymin,float & ymax)543 void find_cblob_vlimits( // get y limits
544     C_BLOB *blob,        // blob to search
545     float leftx,         // x limits
546     float rightx,
547     float &ymin, // output y limits
548     float &ymax) {
549   int16_t stepindex;  // current point
550   ICOORD pos;         // current coords
551   ICOORD vec;         // rotated step
552   C_OUTLINE *outline; // current outline
553                       // outlines
554   C_OUTLINE_IT out_it = blob->out_list();
555 
556   ymin = static_cast<float>(INT32_MAX);
557   ymax = static_cast<float>(-INT32_MAX);
558   for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
559     outline = out_it.data();
560     pos = outline->start_pos(); // get coords
561     for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
562       // inside
563       if (pos.x() >= leftx && pos.x() <= rightx) {
564         UpdateRange(pos.y(), &ymin, &ymax);
565       }
566       vec = outline->step(stepindex);
567       pos += vec; // move to next
568     }
569   }
570 }
571 
572 /**********************************************************************
573  * find_cblob_hlimits
574  *
575  * Scan the outlines of the cblob to locate the x min and max
576  * between the given y limits.
577  **********************************************************************/
578 
find_cblob_hlimits(C_BLOB * blob,float bottomy,float topy,float & xmin,float & xmax)579 void find_cblob_hlimits( // get x limits
580     C_BLOB *blob,        // blob to search
581     float bottomy,       // y limits
582     float topy,
583     float &xmin, // output x limits
584     float &xmax) {
585   int16_t stepindex;  // current point
586   ICOORD pos;         // current coords
587   ICOORD vec;         // rotated step
588   C_OUTLINE *outline; // current outline
589                       // outlines
590   C_OUTLINE_IT out_it = blob->out_list();
591 
592   xmin = static_cast<float>(INT32_MAX);
593   xmax = static_cast<float>(-INT32_MAX);
594   for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
595     outline = out_it.data();
596     pos = outline->start_pos(); // get coords
597     for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) {
598       // inside
599       if (pos.y() >= bottomy && pos.y() <= topy) {
600         UpdateRange(pos.x(), &xmin, &xmax);
601       }
602       vec = outline->step(stepindex);
603       pos += vec; // move to next
604     }
605   }
606 }
607 
608 /**********************************************************************
609  * crotate_cblob
610  *
611  * Rotate the copy by the given vector and return a C_BLOB.
612  **********************************************************************/
613 
crotate_cblob(C_BLOB * blob,FCOORD rotation)614 C_BLOB *crotate_cblob( // rotate it
615     C_BLOB *blob,      // blob to search
616     FCOORD rotation    // for landscape
617 ) {
618   C_OUTLINE_LIST out_list; // output outlines
619                            // input outlines
620   C_OUTLINE_IT in_it = blob->out_list();
621   // output outlines
622   C_OUTLINE_IT out_it = &out_list;
623 
624   for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) {
625     out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation));
626   }
627   return new C_BLOB(&out_list);
628 }
629 
630 /**********************************************************************
631  * box_next
632  *
633  * Compute the bounding box of this blob with merging of x overlaps
634  * but no pre-chopping.
635  * Then move the iterator on to the start of the next blob.
636  **********************************************************************/
637 
box_next(BLOBNBOX_IT * it)638 TBOX box_next(      // get bounding box
639     BLOBNBOX_IT *it // iterator to blobds
640 ) {
641   BLOBNBOX *blob; // current blob
642   TBOX result;    // total box
643 
644   blob = it->data();
645   result = blob->bounding_box();
646   do {
647     it->forward();
648     blob = it->data();
649     if (blob->cblob() == nullptr) {
650       // was pre-chopped
651       result += blob->bounding_box();
652     }
653   }
654   // until next real blob
655   while ((blob->cblob() == nullptr) || blob->joined_to_prev());
656   return result;
657 }
658 
659 /**********************************************************************
660  * box_next_pre_chopped
661  *
662  * Compute the bounding box of this blob with merging of x overlaps
663  * but WITH pre-chopping.
664  * Then move the iterator on to the start of the next pre-chopped blob.
665  **********************************************************************/
666 
box_next_pre_chopped(BLOBNBOX_IT * it)667 TBOX box_next_pre_chopped( // get bounding box
668     BLOBNBOX_IT *it        // iterator to blobds
669 ) {
670   BLOBNBOX *blob; // current blob
671   TBOX result;    // total box
672 
673   blob = it->data();
674   result = blob->bounding_box();
675   do {
676     it->forward();
677     blob = it->data();
678   }
679   // until next real blob
680   while (blob->joined_to_prev());
681   return result;
682 }
683 
684 /**********************************************************************
685  * TO_ROW::TO_ROW
686  *
687  * Constructor to make a row from a blob.
688  **********************************************************************/
689 
TO_ROW(BLOBNBOX * blob,float top,float bottom,float row_size)690 TO_ROW::TO_ROW(     // constructor
691     BLOBNBOX *blob, // first blob
692     float top,      // corrected top
693     float bottom,   // of row
694     float row_size  // ideal
695 ) {
696   clear();
697   y_min = bottom;
698   y_max = top;
699   initial_y_min = bottom;
700 
701   float diff;              // in size
702   BLOBNBOX_IT it = &blobs; // list of blobs
703 
704   it.add_to_end(blob);
705   diff = top - bottom - row_size;
706   if (diff > 0) {
707     y_max -= diff / 2;
708     y_min += diff / 2;
709   }
710   // very small object
711   else if ((top - bottom) * 3 < row_size) {
712     diff = row_size / 3 + bottom - top;
713     y_max += diff / 2;
714     y_min -= diff / 2;
715   }
716 }
717 
print() const718 void TO_ROW::print() const {
719   tprintf(
720       "pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g,"
721       " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g,"
722       " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n",
723       pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, spacing, xheight,
724       y_origin, xheight_evidence, ascrise, descdrop, body_size, min_space, max_nonspace,
725       space_threshold, kern_size, space_size);
726 }
727 
728 /**********************************************************************
729  * TO_ROW:add_blob
730  *
731  * Add the blob to the end of the row.
732  **********************************************************************/
733 
add_blob(BLOBNBOX * blob,float top,float bottom,float row_size)734 void TO_ROW::add_blob( // constructor
735     BLOBNBOX *blob,    // first blob
736     float top,         // corrected top
737     float bottom,      // of row
738     float row_size     // ideal
739 ) {
740   float allowed;           // allowed expansion
741   float available;         // expansion
742   BLOBNBOX_IT it = &blobs; // list of blobs
743 
744   it.add_to_end(blob);
745   allowed = row_size + y_min - y_max;
746   if (allowed > 0) {
747     available = top > y_max ? top - y_max : 0;
748     if (bottom < y_min) {
749       // total available
750       available += y_min - bottom;
751     }
752     if (available > 0) {
753       available += available; // do it gradually
754       if (available < allowed) {
755         available = allowed;
756       }
757       if (bottom < y_min) {
758         y_min -= (y_min - bottom) * allowed / available;
759       }
760       if (top > y_max) {
761         y_max += (top - y_max) * allowed / available;
762       }
763     }
764   }
765 }
766 
767 /**********************************************************************
768  * TO_ROW:insert_blob
769  *
770  * Add the blob to the row in the correct position.
771  **********************************************************************/
772 
insert_blob(BLOBNBOX * blob)773 void TO_ROW::insert_blob( // constructor
774     BLOBNBOX *blob        // first blob
775 ) {
776   BLOBNBOX_IT it = &blobs; // list of blobs
777 
778   if (it.empty()) {
779     it.add_before_then_move(blob);
780   } else {
781     it.mark_cycle_pt();
782     while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) {
783       it.forward();
784     }
785     if (it.cycled_list()) {
786       it.add_to_end(blob);
787     } else {
788       it.add_before_stay_put(blob);
789     }
790   }
791 }
792 
793 /**********************************************************************
794  * TO_ROW::compute_vertical_projection
795  *
796  * Compute the vertical projection of a TO_ROW from its blobs.
797  **********************************************************************/
798 
compute_vertical_projection()799 void TO_ROW::compute_vertical_projection() { // project whole row
800   TBOX row_box;                              // bound of row
801   BLOBNBOX *blob;                            // current blob
802   TBOX blob_box;                             // bounding box
803   BLOBNBOX_IT blob_it = blob_list();
804 
805   if (blob_it.empty()) {
806     return;
807   }
808   row_box = blob_it.data()->bounding_box();
809   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
810     row_box += blob_it.data()->bounding_box();
811   }
812 
813   projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
814   projection_left = row_box.left() - PROJECTION_MARGIN;
815   projection_right = row_box.right() + PROJECTION_MARGIN;
816   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
817     blob = blob_it.data();
818     if (blob->cblob() != nullptr) {
819       vertical_cblob_projection(blob->cblob(), &projection);
820     }
821   }
822 }
823 
824 /**********************************************************************
825  * TO_ROW::clear
826  *
827  * Zero out all scalar members.
828  **********************************************************************/
clear()829 void TO_ROW::clear() {
830   all_caps = false;
831   used_dm_model = false;
832   projection_left = 0;
833   projection_right = 0;
834   pitch_decision = PITCH_DUNNO;
835   fixed_pitch = 0.0;
836   fp_space = 0.0;
837   fp_nonsp = 0.0;
838   pr_space = 0.0;
839   pr_nonsp = 0.0;
840   spacing = 0.0;
841   xheight = 0.0;
842   xheight_evidence = 0;
843   body_size = 0.0;
844   ascrise = 0.0;
845   descdrop = 0.0;
846   min_space = 0;
847   max_nonspace = 0;
848   space_threshold = 0;
849   kern_size = 0.0;
850   space_size = 0.0;
851   y_min = 0.0;
852   y_max = 0.0;
853   initial_y_min = 0.0;
854   m = 0.0;
855   c = 0.0;
856   error = 0.0;
857   para_c = 0.0;
858   para_error = 0.0;
859   y_origin = 0.0;
860   credibility = 0.0;
861   num_repeated_sets_ = -1;
862 }
863 
864 /**********************************************************************
865  * vertical_cblob_projection
866  *
867  * Compute the vertical projection of a cblob from its outlines
868  * and add to the given STATS.
869  **********************************************************************/
870 
vertical_cblob_projection(C_BLOB * blob,STATS * stats)871 void vertical_cblob_projection( // project outlines
872     C_BLOB *blob,               // blob to project
873     STATS *stats                // output
874 ) {
875   // outlines of blob
876   C_OUTLINE_IT out_it = blob->out_list();
877 
878   for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
879     vertical_coutline_projection(out_it.data(), stats);
880   }
881 }
882 
883 /**********************************************************************
884  * vertical_coutline_projection
885  *
886  * Compute the vertical projection of a outline from its outlines
887  * and add to the given STATS.
888  **********************************************************************/
889 
vertical_coutline_projection(C_OUTLINE * outline,STATS * stats)890 void vertical_coutline_projection( // project outlines
891     C_OUTLINE *outline,            // outline to project
892     STATS *stats                   // output
893 ) {
894   ICOORD pos;        // current point
895   ICOORD step;       // edge step
896   int32_t length;    // of outline
897   int16_t stepindex; // current step
898   C_OUTLINE_IT out_it = outline->child();
899 
900   pos = outline->start_pos();
901   length = outline->pathlength();
902   for (stepindex = 0; stepindex < length; stepindex++) {
903     step = outline->step(stepindex);
904     if (step.x() > 0) {
905       stats->add(pos.x(), -pos.y());
906     } else if (step.x() < 0) {
907       stats->add(pos.x() - 1, pos.y());
908     }
909     pos += step;
910   }
911 
912   for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
913     vertical_coutline_projection(out_it.data(), stats);
914   }
915 }
916 
917 /**********************************************************************
918  * TO_BLOCK::TO_BLOCK
919  *
920  * Constructor to make a TO_BLOCK from a real block.
921  **********************************************************************/
922 
TO_BLOCK(BLOCK * src_block)923 TO_BLOCK::TO_BLOCK(  // make a block
924     BLOCK *src_block // real block
925 ) {
926   clear();
927   block = src_block;
928 }
929 
930 /**********************************************************************
931  * TO_BLOCK::clear
932  *
933  * Zero out all scalar members.
934  **********************************************************************/
clear()935 void TO_BLOCK::clear() {
936   block = nullptr;
937   pitch_decision = PITCH_DUNNO;
938   line_spacing = 0.0;
939   line_size = 0.0;
940   max_blob_size = 0.0;
941   baseline_offset = 0.0;
942   xheight = 0.0;
943   fixed_pitch = 0.0;
944   kern_size = 0.0;
945   space_size = 0.0;
946   min_space = 0;
947   max_nonspace = 0;
948   fp_space = 0.0;
949   fp_nonsp = 0.0;
950   pr_space = 0.0;
951   pr_nonsp = 0.0;
952   key_row = nullptr;
953 }
954 
~TO_BLOCK()955 TO_BLOCK::~TO_BLOCK() {
956   // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
957   BLOBNBOX::clear_blobnboxes(&blobs);
958   BLOBNBOX::clear_blobnboxes(&underlines);
959   BLOBNBOX::clear_blobnboxes(&noise_blobs);
960   BLOBNBOX::clear_blobnboxes(&small_blobs);
961   BLOBNBOX::clear_blobnboxes(&large_blobs);
962 }
963 
964 // Helper function to divide the input blobs over noise, small, medium
965 // and large lists. Blobs small in height and (small in width or large in width)
966 // go in the noise list. Dash (-) candidates go in the small list, and
967 // medium and large are by height.
968 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
SizeFilterBlobs(int min_height,int max_height,BLOBNBOX_LIST * src_list,BLOBNBOX_LIST * noise_list,BLOBNBOX_LIST * small_list,BLOBNBOX_LIST * medium_list,BLOBNBOX_LIST * large_list)969 static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_list,
970                             BLOBNBOX_LIST *noise_list, BLOBNBOX_LIST *small_list,
971                             BLOBNBOX_LIST *medium_list, BLOBNBOX_LIST *large_list) {
972   BLOBNBOX_IT noise_it(noise_list);
973   BLOBNBOX_IT small_it(small_list);
974   BLOBNBOX_IT medium_it(medium_list);
975   BLOBNBOX_IT large_it(large_list);
976   for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
977     BLOBNBOX *blob = src_it.extract();
978     blob->ReInit();
979     int width = blob->bounding_box().width();
980     int height = blob->bounding_box().height();
981     if (height < min_height && (width < min_height || width > max_height)) {
982       noise_it.add_after_then_move(blob);
983     } else if (height > max_height) {
984       large_it.add_after_then_move(blob);
985     } else if (height < min_height) {
986       small_it.add_after_then_move(blob);
987     } else {
988       medium_it.add_after_then_move(blob);
989     }
990   }
991 }
992 
993 // Reorganize the blob lists with a different definition of small, medium
994 // and large, compared to the original definition.
995 // Height is still the primary filter key, but medium width blobs of small
996 // height become small, and very wide blobs of small height stay noise, along
997 // with small dot-shaped blobs.
ReSetAndReFilterBlobs()998 void TO_BLOCK::ReSetAndReFilterBlobs() {
999   int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
1000   int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
1001   BLOBNBOX_LIST noise_list;
1002   BLOBNBOX_LIST small_list;
1003   BLOBNBOX_LIST medium_list;
1004   BLOBNBOX_LIST large_list;
1005   SizeFilterBlobs(min_height, max_height, &blobs, &noise_list, &small_list, &medium_list,
1006                   &large_list);
1007   SizeFilterBlobs(min_height, max_height, &large_blobs, &noise_list, &small_list, &medium_list,
1008                   &large_list);
1009   SizeFilterBlobs(min_height, max_height, &small_blobs, &noise_list, &small_list, &medium_list,
1010                   &large_list);
1011   SizeFilterBlobs(min_height, max_height, &noise_blobs, &noise_list, &small_list, &medium_list,
1012                   &large_list);
1013   BLOBNBOX_IT blob_it(&blobs);
1014   blob_it.add_list_after(&medium_list);
1015   blob_it.set_to_list(&large_blobs);
1016   blob_it.add_list_after(&large_list);
1017   blob_it.set_to_list(&small_blobs);
1018   blob_it.add_list_after(&small_list);
1019   blob_it.set_to_list(&noise_blobs);
1020   blob_it.add_list_after(&noise_list);
1021 }
1022 
1023 // Deletes noise blobs from all lists where not owned by a ColPartition.
DeleteUnownedNoise()1024 void TO_BLOCK::DeleteUnownedNoise() {
1025   BLOBNBOX::CleanNeighbours(&blobs);
1026   BLOBNBOX::CleanNeighbours(&small_blobs);
1027   BLOBNBOX::CleanNeighbours(&noise_blobs);
1028   BLOBNBOX::CleanNeighbours(&large_blobs);
1029   BLOBNBOX::DeleteNoiseBlobs(&blobs);
1030   BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
1031   BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
1032   BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
1033 }
1034 
1035 // Computes and stores the edge offsets on each blob for use in feature
1036 // extraction, using greyscale if the supplied grey and thresholds pixes
1037 // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary
1038 // edge step outlines.
1039 // Thresholds must either be the same size as grey or an integer down-scale
1040 // of grey.
1041 // See coutln.h for an explanation of edge offsets.
ComputeEdgeOffsets(Image thresholds,Image grey)1042 void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) {
1043   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
1044   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
1045   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
1046 }
1047 
1048 #ifndef GRAPHICS_DISABLED
1049 // Draw the noise blobs from all lists in red.
plot_noise_blobs(ScrollView * win)1050 void TO_BLOCK::plot_noise_blobs(ScrollView *win) {
1051   BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
1052   BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
1053   BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
1054   BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
1055 }
1056 
1057 // Draw the blobs on the various lists in the block in different colors.
plot_graded_blobs(ScrollView * win)1058 void TO_BLOCK::plot_graded_blobs(ScrollView *win) {
1059   BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
1060   BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, win);
1061   BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, win);
1062   BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
1063 }
1064 
1065 /**********************************************************************
1066  * plot_blob_list
1067  *
1068  * Draw a list of blobs.
1069  **********************************************************************/
1070 
plot_blob_list(ScrollView * win,BLOBNBOX_LIST * list,ScrollView::Color body_colour,ScrollView::Color child_colour)1071 void plot_blob_list(ScrollView *win,                  // window to draw in
1072                     BLOBNBOX_LIST *list,              // blob list
1073                     ScrollView::Color body_colour,    // colour to draw
1074                     ScrollView::Color child_colour) { // colour of child
1075   BLOBNBOX_IT it = list;
1076   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1077     it.data()->plot(win, body_colour, child_colour);
1078   }
1079 }
1080 #endif // !GRAPHICS_DISABLED
1081 
1082 } // namespace tesseract
1083