1 ///////////////////////////////////////////////////////////////////////
2 // File:        colpartitionset.cpp
3 // Description: Class to hold a list of ColPartitions of the page that
4 //              correspond roughly to columns.
5 // Author:      Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config_auto.h"
22 #endif
23 
24 #include "colpartitionset.h"
25 #include "tablefind.h"
26 #include "workingpartset.h"
27 
28 namespace tesseract {
29 
30 // Minimum width of a column to be interesting as a multiple of resolution.
31 const double kMinColumnWidth = 2.0 / 3;
32 
ColPartitionSet(ColPartition_LIST * partitions)33 ColPartitionSet::ColPartitionSet(ColPartition_LIST *partitions) {
34   ColPartition_IT it(&parts_);
35   it.add_list_after(partitions);
36   ComputeCoverage();
37 }
38 
ColPartitionSet(ColPartition * part)39 ColPartitionSet::ColPartitionSet(ColPartition *part) {
40   ColPartition_IT it(&parts_);
41   it.add_after_then_move(part);
42   ComputeCoverage();
43 }
44 
45 // Returns the number of columns of good width.
GoodColumnCount() const46 int ColPartitionSet::GoodColumnCount() const {
47   int num_good_cols = 0;
48   // This is a read-only iteration of the list.
49   ColPartition_IT it(const_cast<ColPartition_LIST *>(&parts_));
50   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
51     if (it.data()->good_width()) {
52       ++num_good_cols;
53     }
54   }
55   return num_good_cols;
56 }
57 
58 // Return an element of the parts_ list from its index.
GetColumnByIndex(int index)59 ColPartition *ColPartitionSet::GetColumnByIndex(int index) {
60   ColPartition_IT it(&parts_);
61   it.mark_cycle_pt();
62   for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()) {
63     ;
64   }
65   if (it.cycled_list()) {
66     return nullptr;
67   }
68   return it.data();
69 }
70 
71 // Return the ColPartition that contains the given coords, if any, else nullptr.
ColumnContaining(int x,int y)72 ColPartition *ColPartitionSet::ColumnContaining(int x, int y) {
73   ColPartition_IT it(&parts_);
74   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
75     ColPartition *part = it.data();
76     if (part->ColumnContains(x, y)) {
77       return part;
78     }
79   }
80   return nullptr;
81 }
82 
83 // Extract all the parts from the list, relinquishing ownership.
RelinquishParts()84 void ColPartitionSet::RelinquishParts() {
85   ColPartition_IT it(&parts_);
86   while (!it.empty()) {
87     it.extract();
88     it.forward();
89   }
90 }
91 
92 // Attempt to improve this by adding partitions or expanding partitions.
ImproveColumnCandidate(const WidthCallback & cb,PartSetVector * src_sets)93 void ColPartitionSet::ImproveColumnCandidate(const WidthCallback &cb,
94                                              PartSetVector *src_sets) {
95   int set_size = src_sets->size();
96   // Iterate over the provided column sets, as each one may have something
97   // to improve this.
98   for (int i = 0; i < set_size; ++i) {
99     ColPartitionSet *column_set = src_sets->at(i);
100     if (column_set == nullptr) {
101       continue;
102     }
103     // Iterate over the parts in this and column_set, adding bigger or
104     // new parts in column_set to this.
105     ColPartition_IT part_it(&parts_);
106     ASSERT_HOST(!part_it.empty());
107     int prev_right = INT32_MIN;
108     part_it.mark_cycle_pt();
109     ColPartition_IT col_it(&column_set->parts_);
110     for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
111       ColPartition *col_part = col_it.data();
112       if (col_part->blob_type() < BRT_UNKNOWN) {
113         continue; // Ignore image partitions.
114       }
115       int col_left = col_part->left_key();
116       int col_right = col_part->right_key();
117       // Sync-up part_it (in this) so it matches the col_part in column_set.
118       ColPartition *part = part_it.data();
119       while (!part_it.at_last() && part->right_key() < col_left) {
120         prev_right = part->right_key();
121         part_it.forward();
122         part = part_it.data();
123       }
124       int part_left = part->left_key();
125       int part_right = part->right_key();
126       if (part_right < col_left || col_right < part_left) {
127         // There is no overlap so this is a new partition.
128         AddPartition(col_part->ShallowCopy(), &part_it);
129         continue;
130       }
131       // Check the edges of col_part to see if they can improve part.
132       bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
133       if (col_left < part_left && col_left > prev_right) {
134         // The left edge of the column is better and it doesn't overlap,
135         // so we can potentially expand it.
136         int col_box_left = col_part->BoxLeftKey();
137         bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
138         bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
139         if (tab_width_ok || (!part_width_ok)) {
140           // The tab is leaving the good column metric at least as good as
141           // it was before, so use the tab.
142           part->CopyLeftTab(*col_part, false);
143           part->SetColumnGoodness(cb);
144         } else if (col_box_left < part_left &&
145                    (box_width_ok || !part_width_ok)) {
146           // The box is leaving the good column metric at least as good as
147           // it was before, so use the box.
148           part->CopyLeftTab(*col_part, true);
149           part->SetColumnGoodness(cb);
150         }
151         part_left = part->left_key();
152       }
153       if (col_right > part_right &&
154           (part_it.at_last() ||
155            part_it.data_relative(1)->left_key() > col_right)) {
156         // The right edge is better, so we can possibly expand it.
157         int col_box_right = col_part->BoxRightKey();
158         bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
159         bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
160         if (tab_width_ok || (!part_width_ok)) {
161           // The tab is leaving the good column metric at least as good as
162           // it was before, so use the tab.
163           part->CopyRightTab(*col_part, false);
164           part->SetColumnGoodness(cb);
165         } else if (col_box_right > part_right &&
166                    (box_width_ok || !part_width_ok)) {
167           // The box is leaving the good column metric at least as good as
168           // it was before, so use the box.
169           part->CopyRightTab(*col_part, true);
170           part->SetColumnGoodness(cb);
171         }
172       }
173     }
174   }
175   ComputeCoverage();
176 }
177 
178 // If this set is good enough to represent a new partitioning into columns,
179 // add it to the vector of sets, otherwise delete it.
AddToColumnSetsIfUnique(PartSetVector * column_sets,const WidthCallback & cb)180 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets,
181                                               const WidthCallback &cb) {
182   bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
183                                          bounding_box_.bottom());
184   if (debug) {
185     tprintf("Considering new column candidate:\n");
186     Print();
187   }
188   if (!LegalColumnCandidate()) {
189     if (debug) {
190       tprintf("Not a legal column candidate:\n");
191       Print();
192     }
193     delete this;
194     return;
195   }
196   for (unsigned i = 0; i < column_sets->size(); ++i) {
197     ColPartitionSet *columns = column_sets->at(i);
198     // In ordering the column set candidates, good_coverage_ is king,
199     // followed by good_column_count_ and then bad_coverage_.
200     bool better = good_coverage_ > columns->good_coverage_;
201     if (good_coverage_ == columns->good_coverage_) {
202       better = good_column_count_ > columns->good_column_count_;
203       if (good_column_count_ == columns->good_column_count_) {
204         better = bad_coverage_ > columns->bad_coverage_;
205       }
206     }
207     if (better) {
208       // The new one is better so add it.
209       if (debug) {
210         tprintf("Good one\n");
211       }
212       column_sets->insert(column_sets->begin() + i, this);
213       return;
214     }
215     if (columns->CompatibleColumns(false, this, cb)) {
216       if (debug) {
217         tprintf("Duplicate\n");
218       }
219       delete this;
220       return; // It is not unique.
221     }
222   }
223   if (debug) {
224     tprintf("Added to end\n");
225   }
226   column_sets->push_back(this);
227 }
228 
229 // Return true if the partitions in other are all compatible with the columns
230 // in this.
CompatibleColumns(bool debug,ColPartitionSet * other,const WidthCallback & cb)231 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other,
232                                         const WidthCallback &cb) {
233   if (debug) {
234     tprintf("CompatibleColumns testing compatibility\n");
235     Print();
236     other->Print();
237   }
238   if (other->parts_.empty()) {
239     if (debug) {
240       tprintf("CompatibleColumns true due to empty other\n");
241     }
242     return true;
243   }
244   ColPartition_IT it(&other->parts_);
245   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
246     ColPartition *part = it.data();
247     if (part->blob_type() < BRT_UNKNOWN) {
248       if (debug) {
249         tprintf("CompatibleColumns ignoring image partition\n");
250         part->Print();
251       }
252       continue; // Image partitions are irrelevant to column compatibility.
253     }
254     int y = part->MidY();
255     int left = part->bounding_box().left();
256     int right = part->bounding_box().right();
257     ColPartition *left_col = ColumnContaining(left, y);
258     ColPartition *right_col = ColumnContaining(right, y);
259     if (right_col == nullptr || left_col == nullptr) {
260       if (debug) {
261         tprintf("CompatibleColumns false due to partition edge outside\n");
262         part->Print();
263       }
264       return false; // A partition edge lies outside of all columns
265     }
266     if (right_col != left_col && cb(right - left)) {
267       if (debug) {
268         tprintf("CompatibleColumns false due to good width in multiple cols\n");
269         part->Print();
270       }
271       return false; // Partition with a good width must be in a single column.
272     }
273 
274     ColPartition_IT it2 = it;
275     while (!it2.at_last()) {
276       it2.forward();
277       ColPartition *next_part = it2.data();
278       if (!BLOBNBOX::IsTextType(next_part->blob_type())) {
279         continue; // Non-text partitions are irrelevant.
280       }
281       int next_left = next_part->bounding_box().left();
282       if (next_left == right) {
283         break; // They share the same edge, so one must be a pull-out.
284       }
285       // Search to see if right and next_left fall within a single column.
286       ColPartition *next_left_col = ColumnContaining(next_left, y);
287       if (right_col == next_left_col) {
288         // There is a column break in this column.
289         // This can be due to a figure caption within a column, a pull-out
290         // block, or a simple broken textline that remains to be merged:
291         // all allowed, or a change in column layout: not allowed.
292         // If both partitions are of good width, then it is likely
293         // a change in column layout, otherwise probably an allowed situation.
294         if (part->good_width() && next_part->good_width()) {
295           if (debug) {
296             int next_right = next_part->bounding_box().right();
297             tprintf("CompatibleColumns false due to 2 parts of good width\n");
298             tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left,
299                     next_right);
300             right_col->Print();
301           }
302           return false;
303         }
304       }
305       break;
306     }
307   }
308   if (debug) {
309     tprintf("CompatibleColumns true!\n");
310   }
311   return true;
312 }
313 
314 // Returns the total width of all blobs in the part_set that do not lie
315 // within an approved column. Used as a cost measure for using this
316 // column set over another that might be compatible.
UnmatchedWidth(ColPartitionSet * part_set)317 int ColPartitionSet::UnmatchedWidth(ColPartitionSet *part_set) {
318   int total_width = 0;
319   ColPartition_IT it(&part_set->parts_);
320   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
321     ColPartition *part = it.data();
322     if (!BLOBNBOX::IsTextType(part->blob_type())) {
323       continue; // Non-text partitions are irrelevant to column compatibility.
324     }
325     int y = part->MidY();
326     BLOBNBOX_C_IT box_it(part->boxes());
327     for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
328       const TBOX &box = it.data()->bounding_box();
329       // Assume that the whole blob is outside any column iff its x-middle
330       // is outside.
331       int x = (box.left() + box.right()) / 2;
332       ColPartition *col = ColumnContaining(x, y);
333       if (col == nullptr) {
334         total_width += box.width();
335       }
336     }
337   }
338   return total_width;
339 }
340 
341 // Return true if this ColPartitionSet makes a legal column candidate by
342 // having legal individual partitions and non-overlapping adjacent pairs.
LegalColumnCandidate()343 bool ColPartitionSet::LegalColumnCandidate() {
344   ColPartition_IT it(&parts_);
345   if (it.empty()) {
346     return false;
347   }
348   bool any_text_parts = false;
349   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
350     ColPartition *part = it.data();
351     if (BLOBNBOX::IsTextType(part->blob_type())) {
352       if (!part->IsLegal()) {
353         return false; // Individual partition is illegal.
354       }
355       any_text_parts = true;
356     }
357     if (!it.at_last()) {
358       ColPartition *next_part = it.data_relative(1);
359       if (next_part->left_key() < part->right_key()) {
360         return false;
361       }
362     }
363   }
364   return any_text_parts;
365 }
366 
367 // Return a copy of this. If good_only will only copy the Good ColPartitions.
Copy(bool good_only)368 ColPartitionSet *ColPartitionSet::Copy(bool good_only) {
369   ColPartition_LIST copy_parts;
370   ColPartition_IT src_it(&parts_);
371   ColPartition_IT dest_it(&copy_parts);
372   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
373     ColPartition *part = src_it.data();
374     if (BLOBNBOX::IsTextType(part->blob_type()) &&
375         (!good_only || part->good_width() || part->good_column())) {
376       dest_it.add_after_then_move(part->ShallowCopy());
377     }
378   }
379   if (dest_it.empty()) {
380     return nullptr;
381   }
382   return new ColPartitionSet(&copy_parts);
383 }
384 
385 // Return the bounding boxes of columns at the given y-range
GetColumnBoxes(int y_bottom,int y_top,ColSegment_LIST * segments)386 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
387                                      ColSegment_LIST *segments) {
388   ColPartition_IT it(&parts_);
389   ColSegment_IT col_it(segments);
390   col_it.move_to_last();
391   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
392     ColPartition *part = it.data();
393     ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
394     ICOORD top_right(part->RightAtY(y_bottom), y_top);
395     auto *col_seg = new ColSegment();
396     col_seg->InsertBox(TBOX(bot_left, top_right));
397     col_it.add_after_then_move(col_seg);
398   }
399 }
400 
401 #ifndef GRAPHICS_DISABLED
402 
403 // Display the edges of the columns at the given y coords.
DisplayColumnEdges(int y_bottom,int y_top,ScrollView * win)404 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
405                                          ScrollView *win) {
406   ColPartition_IT it(&parts_);
407   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
408     ColPartition *part = it.data();
409     win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
410     win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
411   }
412 }
413 
414 #endif // !GRAPHICS_DISABLED
415 
416 // Return the ColumnSpanningType that best explains the columns overlapped
417 // by the given coords(left,right,y), with the given margins.
418 // Also return the first and last column index touched by the coords and
419 // the leftmost spanned column.
420 // Column indices are 2n + 1 for real columns (0 based) and even values
421 // represent the gaps in between columns, with 0 being left of the leftmost.
422 // resolution refers to the ppi resolution of the image.
SpanningType(int resolution,int left,int right,int height,int y,int left_margin,int right_margin,int * first_col,int * last_col,int * first_spanned_col)423 ColumnSpanningType ColPartitionSet::SpanningType(
424     int resolution, int left, int right, int height, int y, int left_margin,
425     int right_margin, int *first_col, int *last_col, int *first_spanned_col) {
426   *first_col = -1;
427   *last_col = -1;
428   *first_spanned_col = -1;
429   int margin_columns = 0;
430   ColPartition_IT it(&parts_);
431   int col_index = 1;
432   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
433     ColPartition *part = it.data();
434     if (part->ColumnContains(left, y) ||
435         (it.at_first() && part->ColumnContains(left + height, y))) {
436       // In the default case, first_col is set, but columns_spanned remains
437       // zero, so first_col will get reset in the first column genuinely
438       // spanned, but we can tell the difference from a noise partition
439       // that touches no column.
440       *first_col = col_index;
441       if (part->ColumnContains(right, y) ||
442           (it.at_last() && part->ColumnContains(right - height, y))) {
443         // Both within a single column.
444         *last_col = col_index;
445         return CST_FLOWING;
446       }
447       if (left_margin <= part->LeftAtY(y)) {
448         // It completely spans this column.
449         *first_spanned_col = col_index;
450         margin_columns = 1;
451       }
452     } else if (part->ColumnContains(right, y) ||
453                (it.at_last() && part->ColumnContains(right - height, y))) {
454       if (*first_col < 0) {
455         // It started in-between.
456         *first_col = col_index - 1;
457       }
458       if (right_margin >= part->RightAtY(y)) {
459         // It completely spans this column.
460         if (margin_columns == 0) {
461           *first_spanned_col = col_index;
462         }
463         ++margin_columns;
464       }
465       *last_col = col_index;
466       break;
467     } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
468       // Neither left nor right are contained within, so it spans this
469       // column.
470       if (*first_col < 0) {
471         // It started in between the previous column and the current column.
472         *first_col = col_index - 1;
473       }
474       if (margin_columns == 0) {
475         *first_spanned_col = col_index;
476       }
477       *last_col = col_index;
478     } else if (right < part->LeftAtY(y)) {
479       // We have gone past the end.
480       *last_col = col_index - 1;
481       if (*first_col < 0) {
482         // It must lie completely between columns =>noise.
483         *first_col = col_index - 1;
484       }
485       break;
486     }
487   }
488   if (*first_col < 0) {
489     *first_col = col_index - 1; // The last in-between.
490   }
491   if (*last_col < 0) {
492     *last_col = col_index - 1; // The last in-between.
493   }
494   ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
495   ASSERT_HOST(*first_col <= *last_col);
496   if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
497     // Neither end was in a column, and it didn't span any, so it lies
498     // entirely between columns, therefore noise.
499     return CST_NOISE;
500   } else if (margin_columns <= 1) {
501     // An exception for headings that stick outside of single-column text.
502     if (margin_columns == 1 && parts_.singleton()) {
503       return CST_HEADING;
504     }
505     // It is a pullout, as left and right were not in the same column, but
506     // it doesn't go to the edge of its start and end.
507     return CST_PULLOUT;
508   }
509   // Its margins went to the edges of first and last columns => heading.
510   return CST_HEADING;
511 }
512 
513 // The column_set has changed. Close down all in-progress WorkingPartSets in
514 // columns that do not match and start new ones for the new columns in this.
515 // As ColPartitions are turned into BLOCKs, the used ones are put in
516 // used_parts, as they still need to be referenced in the grid.
ChangeWorkColumns(const ICOORD & bleft,const ICOORD & tright,int resolution,ColPartition_LIST * used_parts,WorkingPartSet_LIST * working_set_list)517 void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft,
518                                         const ICOORD &tright, int resolution,
519                                         ColPartition_LIST *used_parts,
520                                         WorkingPartSet_LIST *working_set_list) {
521   // Move the input list to a temporary location so we can delete its elements
522   // as we add them to the output working_set.
523   WorkingPartSet_LIST work_src;
524   WorkingPartSet_IT src_it(&work_src);
525   src_it.add_list_after(working_set_list);
526   src_it.move_to_first();
527   WorkingPartSet_IT dest_it(working_set_list);
528   // Completed blocks and to_blocks are accumulated and given to the first new
529   // one  whenever we keep a column, or at the end.
530   BLOCK_LIST completed_blocks;
531   TO_BLOCK_LIST to_blocks;
532   WorkingPartSet *first_new_set = nullptr;
533   WorkingPartSet *working_set = nullptr;
534   ColPartition_IT col_it(&parts_);
535   for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
536     ColPartition *column = col_it.data();
537     // Any existing column to the left of column is completed.
538     while (!src_it.empty() &&
539            ((working_set = src_it.data())->column() == nullptr ||
540             working_set->column()->right_key() <= column->left_key())) {
541       src_it.extract();
542       working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
543                                           &completed_blocks, &to_blocks);
544       delete working_set;
545       src_it.forward();
546     }
547     // Make a new between-column WorkingSet for before the current column.
548     working_set = new WorkingPartSet(nullptr);
549     dest_it.add_after_then_move(working_set);
550     if (first_new_set == nullptr) {
551       first_new_set = working_set;
552     }
553     // A matching column gets to stay, and first_new_set gets all the
554     // completed_sets.
555     working_set = src_it.empty() ? nullptr : src_it.data();
556     if (working_set != nullptr &&
557         working_set->column()->MatchingColumns(*column)) {
558       working_set->set_column(column);
559       dest_it.add_after_then_move(src_it.extract());
560       src_it.forward();
561       first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
562       first_new_set = nullptr;
563     } else {
564       // Just make a new working set for the current column.
565       working_set = new WorkingPartSet(column);
566       dest_it.add_after_then_move(working_set);
567     }
568   }
569   // Complete any remaining src working sets.
570   while (!src_it.empty()) {
571     working_set = src_it.extract();
572     working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
573                                         &completed_blocks, &to_blocks);
574     delete working_set;
575     src_it.forward();
576   }
577   // Make a new between-column WorkingSet for after the last column.
578   working_set = new WorkingPartSet(nullptr);
579   dest_it.add_after_then_move(working_set);
580   if (first_new_set == nullptr) {
581     first_new_set = working_set;
582   }
583   // The first_new_set now gets any accumulated completed_parts/blocks.
584   first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
585 }
586 
587 // Accumulate the widths and gaps into the given variables.
AccumulateColumnWidthsAndGaps(int * total_width,int * width_samples,int * total_gap,int * gap_samples)588 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width,
589                                                     int *width_samples,
590                                                     int *total_gap,
591                                                     int *gap_samples) {
592   ColPartition_IT it(&parts_);
593   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
594     ColPartition *part = it.data();
595     *total_width += part->ColumnWidth();
596     ++*width_samples;
597     if (!it.at_last()) {
598       ColPartition *next_part = it.data_relative(1);
599       int part_left = part->right_key();
600       int part_right = next_part->left_key();
601       int gap = part->KeyWidth(part_left, part_right);
602       *total_gap += gap;
603       ++*gap_samples;
604     }
605   }
606 }
607 
608 // Provide debug output for this ColPartitionSet and all the ColPartitions.
Print()609 void ColPartitionSet::Print() {
610   ColPartition_IT it(&parts_);
611   tprintf(
612       "Partition set of %d parts, %d good, coverage=%d+%d"
613       " (%d,%d)->(%d,%d)\n",
614       it.length(), good_column_count_, good_coverage_, bad_coverage_,
615       bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
616       bounding_box_.top());
617   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
618     ColPartition *part = it.data();
619     part->Print();
620   }
621 }
622 
623 // PRIVATE CODE.
624 
625 // Add the given partition to the list in the appropriate place.
AddPartition(ColPartition * new_part,ColPartition_IT * it)626 void ColPartitionSet::AddPartition(ColPartition *new_part,
627                                    ColPartition_IT *it) {
628   AddPartitionCoverageAndBox(*new_part);
629   int new_right = new_part->right_key();
630   if (it->data()->left_key() >= new_right) {
631     it->add_before_stay_put(new_part);
632   } else {
633     it->add_after_stay_put(new_part);
634   }
635 }
636 
637 // Compute the coverage and good column count. Coverage is the amount of the
638 // width of the page (in pixels) that is covered by ColPartitions, which are
639 // used to provide candidate column layouts.
640 // Coverage is split into good and bad. Good coverage is provided by
641 // ColPartitions of a frequent width (according to the callback function
642 // provided by TabFinder::WidthCB, which accesses stored statistics on the
643 // widths of ColPartitions) and bad coverage is provided by all other
644 // ColPartitions, even if they have tab vectors at both sides. Thus:
645 // |-----------------------------------------------------------------|
646 // |        Double     width    heading                              |
647 // |-----------------------------------------------------------------|
648 // |-------------------------------| |-------------------------------|
649 // |   Common width ColParition    | |  Common width ColPartition    |
650 // |-------------------------------| |-------------------------------|
651 // the layout with two common-width columns has better coverage than the
652 // double width heading, because the coverage is "good," even though less in
653 // total coverage than the heading, because the heading coverage is "bad."
ComputeCoverage()654 void ColPartitionSet::ComputeCoverage() {
655   // Count the number of good columns and sum their width.
656   ColPartition_IT it(&parts_);
657   good_column_count_ = 0;
658   good_coverage_ = 0;
659   bad_coverage_ = 0;
660   bounding_box_ = TBOX();
661   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
662     ColPartition *part = it.data();
663     AddPartitionCoverageAndBox(*part);
664   }
665 }
666 
667 // Adds the coverage, column count and box for a single partition,
668 // without adding it to the list. (Helper factored from ComputeCoverage.)
AddPartitionCoverageAndBox(const ColPartition & part)669 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition &part) {
670   bounding_box_ += part.bounding_box();
671   int coverage = part.ColumnWidth();
672   if (part.good_width()) {
673     good_coverage_ += coverage;
674     good_column_count_ += 2;
675   } else {
676     if (part.blob_type() < BRT_UNKNOWN) {
677       coverage /= 2;
678     }
679     if (part.good_column()) {
680       ++good_column_count_;
681     }
682     bad_coverage_ += coverage;
683   }
684 }
685 
686 } // namespace tesseract.
687