1 /**********************************************************************
2  * File:        stepblob.cpp  (Formerly cblob.c)
3  * Description: Code for C_BLOB class.
4  * Author:      Ray Smith
5  *
6  * (C) Copyright 1991, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #  include "config_auto.h"
22 #endif
23 
24 #include "stepblob.h"
25 
26 #include "points.h" // for operator+=, FCOORD, ICOORD
27 
28 #include <allheaders.h> // for pixCreate, pixGetDepth
29 #include <vector>       // for std::vector
30 
31 namespace tesseract {
32 
33 class DENORM;
34 
35 // Max perimeter to width ratio for a baseline position above box bottom.
36 const double kMaxPerimeterWidthRatio = 8.0;
37 
38 /**********************************************************************
39  * position_outline
40  *
41  * Position the outline in the given list at the relevant place
42  * according to its nesting.
43  **********************************************************************/
position_outline(C_OUTLINE * outline,C_OUTLINE_LIST * destlist)44 static void position_outline( // put in place
45     C_OUTLINE *outline,       // thing to place
46     C_OUTLINE_LIST *destlist  // desstination list
47 ) {
48   C_OUTLINE_IT it = destlist; // iterator
49                               // iterator on children
50   C_OUTLINE_IT child_it = outline->child();
51 
52   if (!it.empty()) {
53     do {
54       // outline from dest list
55       C_OUTLINE *dest_outline = it.data(); // get destination
56                                 // encloses dest
57       if (*dest_outline < *outline) {
58         // take off list
59         dest_outline = it.extract();
60         // put this in place
61         it.add_after_then_move(outline);
62         // make it a child
63         child_it.add_to_end(dest_outline);
64         while (!it.at_last()) {
65           it.forward(); // do rest of list
66                         // check for other children
67           dest_outline = it.data();
68           if (*dest_outline < *outline) {
69             // take off list
70             dest_outline = it.extract();
71             child_it.add_to_end(dest_outline);
72             // make it a child
73             if (it.empty()) {
74               break;
75             }
76           }
77         }
78         return; // finished
79       }
80       // enclosed by dest
81       else if (*outline < *dest_outline) {
82         position_outline(outline, dest_outline->child());
83         // place in child list
84         return; // finished
85       }
86       it.forward();
87     } while (!it.at_first());
88   }
89   it.add_to_end(outline); // at outer level
90 }
91 
92 /**********************************************************************
93  * plot_outline_list
94  *
95  * Draw a list of outlines in the given colour and their children
96  * in the child colour.
97  **********************************************************************/
98 
99 #ifndef GRAPHICS_DISABLED
plot_outline_list(C_OUTLINE_LIST * list,ScrollView * window,ScrollView::Color colour,ScrollView::Color child_colour)100 static void plot_outline_list(     // draw outlines
101     C_OUTLINE_LIST *list,          // outline to draw
102     ScrollView *window,            // window to draw in
103     ScrollView::Color colour,      // colour to use
104     ScrollView::Color child_colour // colour of children
105 ) {
106   C_OUTLINE *outline;     // current outline
107   C_OUTLINE_IT it = list; // iterator
108 
109   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
110     outline = it.data();
111     // draw it
112     outline->plot(window, colour);
113     if (!outline->child()->empty()) {
114       plot_outline_list(outline->child(), window, child_colour, child_colour);
115     }
116   }
117 }
118 // Draws the outlines in the given colour, and child_colour, normalized
119 // using the given denorm, making use of sub-pixel accurate information
120 // if available.
plot_normed_outline_list(const DENORM & denorm,C_OUTLINE_LIST * list,ScrollView::Color colour,ScrollView::Color child_colour,ScrollView * window)121 static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
122                                      ScrollView::Color colour, ScrollView::Color child_colour,
123                                      ScrollView *window) {
124   C_OUTLINE_IT it(list);
125   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
126     C_OUTLINE *outline = it.data();
127     outline->plot_normed(denorm, colour, window);
128     if (!outline->child()->empty()) {
129       plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
130     }
131   }
132 }
133 #endif
134 
135 /**********************************************************************
136  * reverse_outline_list
137  *
138  * Reverse a list of outlines and their children.
139  **********************************************************************/
140 
reverse_outline_list(C_OUTLINE_LIST * list)141 static void reverse_outline_list(C_OUTLINE_LIST *list) {
142   C_OUTLINE_IT it = list; // iterator
143 
144   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
145     C_OUTLINE *outline = it.data();
146     outline->reverse(); // reverse it
147     outline->set_flag(COUT_INVERSE, true);
148     if (!outline->child()->empty()) {
149       reverse_outline_list(outline->child());
150     }
151   }
152 }
153 
154 /**********************************************************************
155  * C_BLOB::C_BLOB
156  *
157  * Constructor to build a C_BLOB from a list of C_OUTLINEs.
158  * The C_OUTLINEs are not copied so the source list is emptied.
159  * The C_OUTLINEs are nested correctly in the blob.
160  **********************************************************************/
161 
C_BLOB(C_OUTLINE_LIST * outline_list)162 C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
163   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
164     C_OUTLINE *outline = ol_it.extract();
165     // Position this outline in appropriate position in the hierarchy.
166     position_outline(outline, &outlines);
167   }
168   CheckInverseFlagAndDirection();
169 }
170 
171 // Simpler constructor to build a blob from a single outline that has
172 // already been fully initialized.
C_BLOB(C_OUTLINE * outline)173 C_BLOB::C_BLOB(C_OUTLINE *outline) {
174   C_OUTLINE_IT it(&outlines);
175   it.add_to_end(outline);
176 }
177 
178 // Builds a set of one or more blobs from a list of outlines.
179 // Input: one outline on outline_list contains all the others, but the
180 // nesting and order are undefined.
181 // If good_blob is true, the blob is added to good_blobs_it, unless
182 // an illegal (generation-skipping) parent-child relationship is found.
183 // If so, the parent blob goes to bad_blobs_it, and the immediate children
184 // are promoted to the top level, recursively being sent to good_blobs_it.
185 // If good_blob is false, all created blobs will go to the bad_blobs_it.
186 // Output: outline_list is empty. One or more blobs are added to
187 // good_blobs_it and/or bad_blobs_it.
ConstructBlobsFromOutlines(bool good_blob,C_OUTLINE_LIST * outline_list,C_BLOB_IT * good_blobs_it,C_BLOB_IT * bad_blobs_it)188 void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
189                                         C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it) {
190   // List of top-level outlines with correctly nested children.
191   C_OUTLINE_LIST nested_outlines;
192   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
193     C_OUTLINE *outline = ol_it.extract();
194     // Position this outline in appropriate position in the hierarchy.
195     position_outline(outline, &nested_outlines);
196   }
197   // Check for legal nesting and reassign as required.
198   for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
199     C_OUTLINE *outline = ol_it.extract();
200     bool blob_is_good = good_blob;
201     if (!outline->IsLegallyNested()) {
202       // The blob is illegally nested.
203       // Mark it bad, and add all its children to the top-level list.
204       blob_is_good = false;
205       ol_it.add_list_after(outline->child());
206     }
207     auto *blob = new C_BLOB(outline);
208     // Set inverse flag and reverse if needed.
209     blob->CheckInverseFlagAndDirection();
210     // Put on appropriate list.
211     if (!blob_is_good && bad_blobs_it != nullptr) {
212       bad_blobs_it->add_after_then_move(blob);
213     } else {
214       good_blobs_it->add_after_then_move(blob);
215     }
216   }
217 }
218 
219 // Sets the COUT_INVERSE flag appropriately on the outlines and their
220 // children recursively, reversing the outlines if needed so that
221 // everything has an anticlockwise top-level.
CheckInverseFlagAndDirection()222 void C_BLOB::CheckInverseFlagAndDirection() {
223   C_OUTLINE_IT ol_it(&outlines);
224   for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
225     C_OUTLINE *outline = ol_it.data();
226     if (outline->turn_direction() < 0) {
227       outline->reverse();
228       reverse_outline_list(outline->child());
229       outline->set_flag(COUT_INVERSE, true);
230     } else {
231       outline->set_flag(COUT_INVERSE, false);
232     }
233   }
234 }
235 
236 // Build and return a fake blob containing a single fake outline with no
237 // steps.
FakeBlob(const TBOX & box)238 C_BLOB *C_BLOB::FakeBlob(const TBOX &box) {
239   C_OUTLINE_LIST outlines;
240   C_OUTLINE::FakeOutline(box, &outlines);
241   return new C_BLOB(&outlines);
242 }
243 
244 /**********************************************************************
245  * C_BLOB::bounding_box
246  *
247  * Return the bounding box of the blob.
248  **********************************************************************/
249 
bounding_box() const250 TBOX C_BLOB::bounding_box() const { // bounding box
251   // This is a read-only iteration of the outlines.
252   C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST *>(&outlines);
253   TBOX box; // bounding box
254 
255   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256     C_OUTLINE *outline = it.data();
257     box += outline->bounding_box();
258   }
259   return box;
260 }
261 
262 /**********************************************************************
263  * C_BLOB::area
264  *
265  * Return the area of the blob.
266  **********************************************************************/
267 
area()268 int32_t C_BLOB::area() {       // area
269   C_OUTLINE_IT it = &outlines; // outlines of blob
270   int32_t total = 0;           // total area
271 
272   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
273     C_OUTLINE *outline = it.data();
274     total += outline->area();
275   }
276   return total;
277 }
278 
279 /**********************************************************************
280  * C_BLOB::perimeter
281  *
282  * Return the perimeter of the top and 2nd level outlines.
283  **********************************************************************/
284 
perimeter()285 int32_t C_BLOB::perimeter() {
286   C_OUTLINE_IT it = &outlines; // outlines of blob
287   int32_t total = 0;           // total perimeter
288 
289   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
290     C_OUTLINE *outline = it.data();
291     total += outline->perimeter();
292   }
293   return total;
294 }
295 
296 /**********************************************************************
297  * C_BLOB::outer_area
298  *
299  * Return the area of the blob.
300  **********************************************************************/
301 
outer_area()302 int32_t C_BLOB::outer_area() { // area
303   C_OUTLINE_IT it = &outlines; // outlines of blob
304   int32_t total = 0;           // total area
305 
306   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
307     C_OUTLINE *outline = it.data();
308     total += outline->outer_area();
309   }
310   return total;
311 }
312 
313 /**********************************************************************
314  * C_BLOB::count_transitions
315  *
316  * Return the total x and y maxes and mins in the blob.
317  * Chlid outlines are not counted.
318  **********************************************************************/
319 
count_transitions(int32_t threshold)320 int32_t C_BLOB::count_transitions( // area
321     int32_t threshold              // on size
322 ) {
323   C_OUTLINE_IT it = &outlines; // outlines of blob
324   int32_t total = 0;           // total area
325 
326   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
327     C_OUTLINE *outline = it.data();
328     total += outline->count_transitions(threshold);
329   }
330   return total;
331 }
332 
333 /**********************************************************************
334  * C_BLOB::move
335  *
336  * Move C_BLOB by vector
337  **********************************************************************/
338 
move(const ICOORD vec)339 void C_BLOB::move(   // reposition blob
340     const ICOORD vec // by vector
341 ) {
342   C_OUTLINE_IT it(&outlines); // iterator
343 
344   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
345     it.data()->move(vec); // move each outline
346   }
347 }
348 
349 // Static helper for C_BLOB::rotate to allow recursion of child outlines.
RotateOutlineList(const FCOORD & rotation,C_OUTLINE_LIST * outlines)350 static void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines) {
351   C_OUTLINE_LIST new_outlines;
352   C_OUTLINE_IT src_it(outlines);
353   C_OUTLINE_IT dest_it(&new_outlines);
354   while (!src_it.empty()) {
355     C_OUTLINE *old_outline = src_it.extract();
356     src_it.forward();
357     auto *new_outline = new C_OUTLINE(old_outline, rotation);
358     if (!old_outline->child()->empty()) {
359       RotateOutlineList(rotation, old_outline->child());
360       C_OUTLINE_IT child_it(new_outline->child());
361       child_it.add_list_after(old_outline->child());
362     }
363     delete old_outline;
364     dest_it.add_to_end(new_outline);
365   }
366   src_it.add_list_after(&new_outlines);
367 }
368 
369 /**********************************************************************
370  * C_BLOB::rotate
371  *
372  * Rotate C_BLOB by rotation.
373  * Warning! has to rebuild all the C_OUTLINEs.
374  **********************************************************************/
rotate(const FCOORD & rotation)375 void C_BLOB::rotate(const FCOORD &rotation) {
376   RotateOutlineList(rotation, &outlines);
377 }
378 
379 // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
380 // outline list and its children.
ComputeEdgeOffsetsOutlineList(int threshold,Image pix,C_OUTLINE_LIST * list)381 static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
382   C_OUTLINE_IT it(list);
383   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
384     C_OUTLINE *outline = it.data();
385     if (pix != nullptr && pixGetDepth(pix) == 8) {
386       outline->ComputeEdgeOffsets(threshold, pix);
387     } else {
388       outline->ComputeBinaryOffsets();
389     }
390     if (!outline->child()->empty()) {
391       ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
392     }
393   }
394 }
395 
396 // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
397 // if the supplied pix is 8-bit or the binary edges if nullptr.
ComputeEdgeOffsets(int threshold,Image pix)398 void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
399   ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
400 }
401 
402 // Estimates and returns the baseline position based on the shape of the
403 // outlines.
404 // We first find the minimum y-coord (y_mins) at each x-coord within the blob.
405 // If there is a run of some y or y+1 in y_mins that is longer than the total
406 // number of positions at bottom or bottom+1, subject to the additional
407 // condition that at least one side of the y/y+1 run is higher than y+1, so it
408 // is not a local minimum, then y, not the bottom, makes a good candidate
409 // baseline position for this blob. Eg
410 //   |                  ---|
411 //   |                  |
412 //   |-      -----------|        <=  Good candidate baseline position.
413 //    |-    -|
414 //     |   -|
415 //     |---|                     <=  Bottom of blob
EstimateBaselinePosition()416 int16_t C_BLOB::EstimateBaselinePosition() {
417   TBOX box = bounding_box();
418   int left = box.left();
419   int width = box.width();
420   int bottom = box.bottom();
421   if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
422     return bottom; // This is only for non-CJK blobs.
423   }
424   // Get the minimum y coordinate at each x-coordinate.
425   std::vector<int> y_mins(width + 1, box.top());
426   C_OUTLINE_IT it(&outlines);
427   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
428     C_OUTLINE *outline = it.data();
429     ICOORD pos = outline->start_pos();
430     for (int s = 0; s < outline->pathlength(); ++s) {
431       if (pos.y() < y_mins[pos.x() - left]) {
432         y_mins[pos.x() - left] = pos.y();
433       }
434       pos += outline->step(s);
435     }
436   }
437   // Find the total extent of the bottom or bottom + 1.
438   int bottom_extent = 0;
439   for (int x = 0; x <= width; ++x) {
440     if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
441       ++bottom_extent;
442     }
443   }
444   // Find the lowest run longer than the bottom extent that is not the bottom.
445   int best_min = box.top();
446   int prev_run = 0;
447   int prev_y = box.top();
448   int prev_prev_y = box.top();
449   for (int x = 0; x < width; x += prev_run) {
450     // Find the length of the current run.
451     int y_at_x = y_mins[x];
452     int run = 1;
453     while (x + run <= width && y_mins[x + run] == y_at_x) {
454       ++run;
455     }
456     if (y_at_x > bottom + 1) {
457       // Possible contender.
458       int total_run = run;
459       // Find extent of current value or +1 to the right of x.
460       while (x + total_run <= width &&
461              (y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
462         ++total_run;
463       }
464       // At least one end has to be higher so it is not a local max.
465       if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
466         // If the prev_run is at y + 1, then we can add that too. There cannot
467         // be a suitable run at y before that or we would have found it already.
468         if (prev_run > 0 && prev_y == y_at_x + 1) {
469           total_run += prev_run;
470         }
471         if (total_run > bottom_extent && y_at_x < best_min) {
472           best_min = y_at_x;
473         }
474       }
475     }
476     prev_run = run;
477     prev_prev_y = prev_y;
478     prev_y = y_at_x;
479   }
480   return best_min == box.top() ? bottom : best_min;
481 }
482 
render_outline_list(C_OUTLINE_LIST * list,int left,int top,Image pix)483 static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
484   C_OUTLINE_IT it(list);
485   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
486     C_OUTLINE *outline = it.data();
487     outline->render(left, top, pix);
488     if (!outline->child()->empty()) {
489       render_outline_list(outline->child(), left, top, pix);
490     }
491   }
492 }
493 
render_outline_list_outline(C_OUTLINE_LIST * list,int left,int top,Image pix)494 static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
495   C_OUTLINE_IT it(list);
496   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
497     C_OUTLINE *outline = it.data();
498     outline->render_outline(left, top, pix);
499   }
500 }
501 
502 // Returns a Pix rendering of the blob. pixDestroy after use.
render()503 Image C_BLOB::render() {
504   TBOX box = bounding_box();
505   Image pix = pixCreate(box.width(), box.height(), 1);
506   render_outline_list(&outlines, box.left(), box.top(), pix);
507   return pix;
508 }
509 
510 // Returns a Pix rendering of the outline of the blob. (no fill).
511 // pixDestroy after use.
render_outline()512 Image C_BLOB::render_outline() {
513   TBOX box = bounding_box();
514   Image pix = pixCreate(box.width(), box.height(), 1);
515   render_outline_list_outline(&outlines, box.left(), box.top(), pix);
516   return pix;
517 }
518 
519 /**********************************************************************
520  * C_BLOB::plot
521  *
522  * Draw the C_BLOB in the given colour.
523  **********************************************************************/
524 
525 #ifndef GRAPHICS_DISABLED
plot(ScrollView * window,ScrollView::Color blob_colour,ScrollView::Color child_colour)526 void C_BLOB::plot(ScrollView *window,               // window to draw in
527                   ScrollView::Color blob_colour,    // main colour
528                   ScrollView::Color child_colour) { // for holes
529   plot_outline_list(&outlines, window, blob_colour, child_colour);
530 }
531 // Draws the blob in the given colour, and child_colour, normalized
532 // using the given denorm, making use of sub-pixel accurate information
533 // if available.
plot_normed(const DENORM & denorm,ScrollView::Color blob_colour,ScrollView::Color child_colour,ScrollView * window)534 void C_BLOB::plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
535                          ScrollView::Color child_colour, ScrollView *window) {
536   plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window);
537 }
538 #endif
539 
540 } // namespace tesseract
541