1 ///////////////////////////////////////////////////////////////////////
2 // File: colpartitionset.cpp
3 // Description: Class to hold a list of ColPartitions of the page that
4 // correspond roughly to columns.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include "colpartitionset.h"
25 #include "tablefind.h"
26 #include "workingpartset.h"
27
28 namespace tesseract {
29
30 // Minimum width of a column to be interesting as a multiple of resolution.
31 const double kMinColumnWidth = 2.0 / 3;
32
ColPartitionSet(ColPartition_LIST * partitions)33 ColPartitionSet::ColPartitionSet(ColPartition_LIST *partitions) {
34 ColPartition_IT it(&parts_);
35 it.add_list_after(partitions);
36 ComputeCoverage();
37 }
38
ColPartitionSet(ColPartition * part)39 ColPartitionSet::ColPartitionSet(ColPartition *part) {
40 ColPartition_IT it(&parts_);
41 it.add_after_then_move(part);
42 ComputeCoverage();
43 }
44
45 // Returns the number of columns of good width.
GoodColumnCount() const46 int ColPartitionSet::GoodColumnCount() const {
47 int num_good_cols = 0;
48 // This is a read-only iteration of the list.
49 ColPartition_IT it(const_cast<ColPartition_LIST *>(&parts_));
50 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
51 if (it.data()->good_width()) {
52 ++num_good_cols;
53 }
54 }
55 return num_good_cols;
56 }
57
58 // Return an element of the parts_ list from its index.
GetColumnByIndex(int index)59 ColPartition *ColPartitionSet::GetColumnByIndex(int index) {
60 ColPartition_IT it(&parts_);
61 it.mark_cycle_pt();
62 for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()) {
63 ;
64 }
65 if (it.cycled_list()) {
66 return nullptr;
67 }
68 return it.data();
69 }
70
71 // Return the ColPartition that contains the given coords, if any, else nullptr.
ColumnContaining(int x,int y)72 ColPartition *ColPartitionSet::ColumnContaining(int x, int y) {
73 ColPartition_IT it(&parts_);
74 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
75 ColPartition *part = it.data();
76 if (part->ColumnContains(x, y)) {
77 return part;
78 }
79 }
80 return nullptr;
81 }
82
83 // Extract all the parts from the list, relinquishing ownership.
RelinquishParts()84 void ColPartitionSet::RelinquishParts() {
85 ColPartition_IT it(&parts_);
86 while (!it.empty()) {
87 it.extract();
88 it.forward();
89 }
90 }
91
92 // Attempt to improve this by adding partitions or expanding partitions.
ImproveColumnCandidate(const WidthCallback & cb,PartSetVector * src_sets)93 void ColPartitionSet::ImproveColumnCandidate(const WidthCallback &cb,
94 PartSetVector *src_sets) {
95 int set_size = src_sets->size();
96 // Iterate over the provided column sets, as each one may have something
97 // to improve this.
98 for (int i = 0; i < set_size; ++i) {
99 ColPartitionSet *column_set = src_sets->at(i);
100 if (column_set == nullptr) {
101 continue;
102 }
103 // Iterate over the parts in this and column_set, adding bigger or
104 // new parts in column_set to this.
105 ColPartition_IT part_it(&parts_);
106 ASSERT_HOST(!part_it.empty());
107 int prev_right = INT32_MIN;
108 part_it.mark_cycle_pt();
109 ColPartition_IT col_it(&column_set->parts_);
110 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
111 ColPartition *col_part = col_it.data();
112 if (col_part->blob_type() < BRT_UNKNOWN) {
113 continue; // Ignore image partitions.
114 }
115 int col_left = col_part->left_key();
116 int col_right = col_part->right_key();
117 // Sync-up part_it (in this) so it matches the col_part in column_set.
118 ColPartition *part = part_it.data();
119 while (!part_it.at_last() && part->right_key() < col_left) {
120 prev_right = part->right_key();
121 part_it.forward();
122 part = part_it.data();
123 }
124 int part_left = part->left_key();
125 int part_right = part->right_key();
126 if (part_right < col_left || col_right < part_left) {
127 // There is no overlap so this is a new partition.
128 AddPartition(col_part->ShallowCopy(), &part_it);
129 continue;
130 }
131 // Check the edges of col_part to see if they can improve part.
132 bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
133 if (col_left < part_left && col_left > prev_right) {
134 // The left edge of the column is better and it doesn't overlap,
135 // so we can potentially expand it.
136 int col_box_left = col_part->BoxLeftKey();
137 bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
138 bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
139 if (tab_width_ok || (!part_width_ok)) {
140 // The tab is leaving the good column metric at least as good as
141 // it was before, so use the tab.
142 part->CopyLeftTab(*col_part, false);
143 part->SetColumnGoodness(cb);
144 } else if (col_box_left < part_left &&
145 (box_width_ok || !part_width_ok)) {
146 // The box is leaving the good column metric at least as good as
147 // it was before, so use the box.
148 part->CopyLeftTab(*col_part, true);
149 part->SetColumnGoodness(cb);
150 }
151 part_left = part->left_key();
152 }
153 if (col_right > part_right &&
154 (part_it.at_last() ||
155 part_it.data_relative(1)->left_key() > col_right)) {
156 // The right edge is better, so we can possibly expand it.
157 int col_box_right = col_part->BoxRightKey();
158 bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
159 bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
160 if (tab_width_ok || (!part_width_ok)) {
161 // The tab is leaving the good column metric at least as good as
162 // it was before, so use the tab.
163 part->CopyRightTab(*col_part, false);
164 part->SetColumnGoodness(cb);
165 } else if (col_box_right > part_right &&
166 (box_width_ok || !part_width_ok)) {
167 // The box is leaving the good column metric at least as good as
168 // it was before, so use the box.
169 part->CopyRightTab(*col_part, true);
170 part->SetColumnGoodness(cb);
171 }
172 }
173 }
174 }
175 ComputeCoverage();
176 }
177
178 // If this set is good enough to represent a new partitioning into columns,
179 // add it to the vector of sets, otherwise delete it.
AddToColumnSetsIfUnique(PartSetVector * column_sets,const WidthCallback & cb)180 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets,
181 const WidthCallback &cb) {
182 bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
183 bounding_box_.bottom());
184 if (debug) {
185 tprintf("Considering new column candidate:\n");
186 Print();
187 }
188 if (!LegalColumnCandidate()) {
189 if (debug) {
190 tprintf("Not a legal column candidate:\n");
191 Print();
192 }
193 delete this;
194 return;
195 }
196 for (unsigned i = 0; i < column_sets->size(); ++i) {
197 ColPartitionSet *columns = column_sets->at(i);
198 // In ordering the column set candidates, good_coverage_ is king,
199 // followed by good_column_count_ and then bad_coverage_.
200 bool better = good_coverage_ > columns->good_coverage_;
201 if (good_coverage_ == columns->good_coverage_) {
202 better = good_column_count_ > columns->good_column_count_;
203 if (good_column_count_ == columns->good_column_count_) {
204 better = bad_coverage_ > columns->bad_coverage_;
205 }
206 }
207 if (better) {
208 // The new one is better so add it.
209 if (debug) {
210 tprintf("Good one\n");
211 }
212 column_sets->insert(column_sets->begin() + i, this);
213 return;
214 }
215 if (columns->CompatibleColumns(false, this, cb)) {
216 if (debug) {
217 tprintf("Duplicate\n");
218 }
219 delete this;
220 return; // It is not unique.
221 }
222 }
223 if (debug) {
224 tprintf("Added to end\n");
225 }
226 column_sets->push_back(this);
227 }
228
229 // Return true if the partitions in other are all compatible with the columns
230 // in this.
CompatibleColumns(bool debug,ColPartitionSet * other,const WidthCallback & cb)231 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other,
232 const WidthCallback &cb) {
233 if (debug) {
234 tprintf("CompatibleColumns testing compatibility\n");
235 Print();
236 other->Print();
237 }
238 if (other->parts_.empty()) {
239 if (debug) {
240 tprintf("CompatibleColumns true due to empty other\n");
241 }
242 return true;
243 }
244 ColPartition_IT it(&other->parts_);
245 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
246 ColPartition *part = it.data();
247 if (part->blob_type() < BRT_UNKNOWN) {
248 if (debug) {
249 tprintf("CompatibleColumns ignoring image partition\n");
250 part->Print();
251 }
252 continue; // Image partitions are irrelevant to column compatibility.
253 }
254 int y = part->MidY();
255 int left = part->bounding_box().left();
256 int right = part->bounding_box().right();
257 ColPartition *left_col = ColumnContaining(left, y);
258 ColPartition *right_col = ColumnContaining(right, y);
259 if (right_col == nullptr || left_col == nullptr) {
260 if (debug) {
261 tprintf("CompatibleColumns false due to partition edge outside\n");
262 part->Print();
263 }
264 return false; // A partition edge lies outside of all columns
265 }
266 if (right_col != left_col && cb(right - left)) {
267 if (debug) {
268 tprintf("CompatibleColumns false due to good width in multiple cols\n");
269 part->Print();
270 }
271 return false; // Partition with a good width must be in a single column.
272 }
273
274 ColPartition_IT it2 = it;
275 while (!it2.at_last()) {
276 it2.forward();
277 ColPartition *next_part = it2.data();
278 if (!BLOBNBOX::IsTextType(next_part->blob_type())) {
279 continue; // Non-text partitions are irrelevant.
280 }
281 int next_left = next_part->bounding_box().left();
282 if (next_left == right) {
283 break; // They share the same edge, so one must be a pull-out.
284 }
285 // Search to see if right and next_left fall within a single column.
286 ColPartition *next_left_col = ColumnContaining(next_left, y);
287 if (right_col == next_left_col) {
288 // There is a column break in this column.
289 // This can be due to a figure caption within a column, a pull-out
290 // block, or a simple broken textline that remains to be merged:
291 // all allowed, or a change in column layout: not allowed.
292 // If both partitions are of good width, then it is likely
293 // a change in column layout, otherwise probably an allowed situation.
294 if (part->good_width() && next_part->good_width()) {
295 if (debug) {
296 int next_right = next_part->bounding_box().right();
297 tprintf("CompatibleColumns false due to 2 parts of good width\n");
298 tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left,
299 next_right);
300 right_col->Print();
301 }
302 return false;
303 }
304 }
305 break;
306 }
307 }
308 if (debug) {
309 tprintf("CompatibleColumns true!\n");
310 }
311 return true;
312 }
313
314 // Returns the total width of all blobs in the part_set that do not lie
315 // within an approved column. Used as a cost measure for using this
316 // column set over another that might be compatible.
UnmatchedWidth(ColPartitionSet * part_set)317 int ColPartitionSet::UnmatchedWidth(ColPartitionSet *part_set) {
318 int total_width = 0;
319 ColPartition_IT it(&part_set->parts_);
320 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
321 ColPartition *part = it.data();
322 if (!BLOBNBOX::IsTextType(part->blob_type())) {
323 continue; // Non-text partitions are irrelevant to column compatibility.
324 }
325 int y = part->MidY();
326 BLOBNBOX_C_IT box_it(part->boxes());
327 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
328 const TBOX &box = it.data()->bounding_box();
329 // Assume that the whole blob is outside any column iff its x-middle
330 // is outside.
331 int x = (box.left() + box.right()) / 2;
332 ColPartition *col = ColumnContaining(x, y);
333 if (col == nullptr) {
334 total_width += box.width();
335 }
336 }
337 }
338 return total_width;
339 }
340
341 // Return true if this ColPartitionSet makes a legal column candidate by
342 // having legal individual partitions and non-overlapping adjacent pairs.
LegalColumnCandidate()343 bool ColPartitionSet::LegalColumnCandidate() {
344 ColPartition_IT it(&parts_);
345 if (it.empty()) {
346 return false;
347 }
348 bool any_text_parts = false;
349 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
350 ColPartition *part = it.data();
351 if (BLOBNBOX::IsTextType(part->blob_type())) {
352 if (!part->IsLegal()) {
353 return false; // Individual partition is illegal.
354 }
355 any_text_parts = true;
356 }
357 if (!it.at_last()) {
358 ColPartition *next_part = it.data_relative(1);
359 if (next_part->left_key() < part->right_key()) {
360 return false;
361 }
362 }
363 }
364 return any_text_parts;
365 }
366
367 // Return a copy of this. If good_only will only copy the Good ColPartitions.
Copy(bool good_only)368 ColPartitionSet *ColPartitionSet::Copy(bool good_only) {
369 ColPartition_LIST copy_parts;
370 ColPartition_IT src_it(&parts_);
371 ColPartition_IT dest_it(©_parts);
372 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
373 ColPartition *part = src_it.data();
374 if (BLOBNBOX::IsTextType(part->blob_type()) &&
375 (!good_only || part->good_width() || part->good_column())) {
376 dest_it.add_after_then_move(part->ShallowCopy());
377 }
378 }
379 if (dest_it.empty()) {
380 return nullptr;
381 }
382 return new ColPartitionSet(©_parts);
383 }
384
385 // Return the bounding boxes of columns at the given y-range
GetColumnBoxes(int y_bottom,int y_top,ColSegment_LIST * segments)386 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
387 ColSegment_LIST *segments) {
388 ColPartition_IT it(&parts_);
389 ColSegment_IT col_it(segments);
390 col_it.move_to_last();
391 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
392 ColPartition *part = it.data();
393 ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
394 ICOORD top_right(part->RightAtY(y_bottom), y_top);
395 auto *col_seg = new ColSegment();
396 col_seg->InsertBox(TBOX(bot_left, top_right));
397 col_it.add_after_then_move(col_seg);
398 }
399 }
400
401 #ifndef GRAPHICS_DISABLED
402
403 // Display the edges of the columns at the given y coords.
DisplayColumnEdges(int y_bottom,int y_top,ScrollView * win)404 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
405 ScrollView *win) {
406 ColPartition_IT it(&parts_);
407 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
408 ColPartition *part = it.data();
409 win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
410 win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
411 }
412 }
413
414 #endif // !GRAPHICS_DISABLED
415
416 // Return the ColumnSpanningType that best explains the columns overlapped
417 // by the given coords(left,right,y), with the given margins.
418 // Also return the first and last column index touched by the coords and
419 // the leftmost spanned column.
420 // Column indices are 2n + 1 for real columns (0 based) and even values
421 // represent the gaps in between columns, with 0 being left of the leftmost.
422 // resolution refers to the ppi resolution of the image.
SpanningType(int resolution,int left,int right,int height,int y,int left_margin,int right_margin,int * first_col,int * last_col,int * first_spanned_col)423 ColumnSpanningType ColPartitionSet::SpanningType(
424 int resolution, int left, int right, int height, int y, int left_margin,
425 int right_margin, int *first_col, int *last_col, int *first_spanned_col) {
426 *first_col = -1;
427 *last_col = -1;
428 *first_spanned_col = -1;
429 int margin_columns = 0;
430 ColPartition_IT it(&parts_);
431 int col_index = 1;
432 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
433 ColPartition *part = it.data();
434 if (part->ColumnContains(left, y) ||
435 (it.at_first() && part->ColumnContains(left + height, y))) {
436 // In the default case, first_col is set, but columns_spanned remains
437 // zero, so first_col will get reset in the first column genuinely
438 // spanned, but we can tell the difference from a noise partition
439 // that touches no column.
440 *first_col = col_index;
441 if (part->ColumnContains(right, y) ||
442 (it.at_last() && part->ColumnContains(right - height, y))) {
443 // Both within a single column.
444 *last_col = col_index;
445 return CST_FLOWING;
446 }
447 if (left_margin <= part->LeftAtY(y)) {
448 // It completely spans this column.
449 *first_spanned_col = col_index;
450 margin_columns = 1;
451 }
452 } else if (part->ColumnContains(right, y) ||
453 (it.at_last() && part->ColumnContains(right - height, y))) {
454 if (*first_col < 0) {
455 // It started in-between.
456 *first_col = col_index - 1;
457 }
458 if (right_margin >= part->RightAtY(y)) {
459 // It completely spans this column.
460 if (margin_columns == 0) {
461 *first_spanned_col = col_index;
462 }
463 ++margin_columns;
464 }
465 *last_col = col_index;
466 break;
467 } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
468 // Neither left nor right are contained within, so it spans this
469 // column.
470 if (*first_col < 0) {
471 // It started in between the previous column and the current column.
472 *first_col = col_index - 1;
473 }
474 if (margin_columns == 0) {
475 *first_spanned_col = col_index;
476 }
477 *last_col = col_index;
478 } else if (right < part->LeftAtY(y)) {
479 // We have gone past the end.
480 *last_col = col_index - 1;
481 if (*first_col < 0) {
482 // It must lie completely between columns =>noise.
483 *first_col = col_index - 1;
484 }
485 break;
486 }
487 }
488 if (*first_col < 0) {
489 *first_col = col_index - 1; // The last in-between.
490 }
491 if (*last_col < 0) {
492 *last_col = col_index - 1; // The last in-between.
493 }
494 ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
495 ASSERT_HOST(*first_col <= *last_col);
496 if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
497 // Neither end was in a column, and it didn't span any, so it lies
498 // entirely between columns, therefore noise.
499 return CST_NOISE;
500 } else if (margin_columns <= 1) {
501 // An exception for headings that stick outside of single-column text.
502 if (margin_columns == 1 && parts_.singleton()) {
503 return CST_HEADING;
504 }
505 // It is a pullout, as left and right were not in the same column, but
506 // it doesn't go to the edge of its start and end.
507 return CST_PULLOUT;
508 }
509 // Its margins went to the edges of first and last columns => heading.
510 return CST_HEADING;
511 }
512
513 // The column_set has changed. Close down all in-progress WorkingPartSets in
514 // columns that do not match and start new ones for the new columns in this.
515 // As ColPartitions are turned into BLOCKs, the used ones are put in
516 // used_parts, as they still need to be referenced in the grid.
ChangeWorkColumns(const ICOORD & bleft,const ICOORD & tright,int resolution,ColPartition_LIST * used_parts,WorkingPartSet_LIST * working_set_list)517 void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft,
518 const ICOORD &tright, int resolution,
519 ColPartition_LIST *used_parts,
520 WorkingPartSet_LIST *working_set_list) {
521 // Move the input list to a temporary location so we can delete its elements
522 // as we add them to the output working_set.
523 WorkingPartSet_LIST work_src;
524 WorkingPartSet_IT src_it(&work_src);
525 src_it.add_list_after(working_set_list);
526 src_it.move_to_first();
527 WorkingPartSet_IT dest_it(working_set_list);
528 // Completed blocks and to_blocks are accumulated and given to the first new
529 // one whenever we keep a column, or at the end.
530 BLOCK_LIST completed_blocks;
531 TO_BLOCK_LIST to_blocks;
532 WorkingPartSet *first_new_set = nullptr;
533 WorkingPartSet *working_set = nullptr;
534 ColPartition_IT col_it(&parts_);
535 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
536 ColPartition *column = col_it.data();
537 // Any existing column to the left of column is completed.
538 while (!src_it.empty() &&
539 ((working_set = src_it.data())->column() == nullptr ||
540 working_set->column()->right_key() <= column->left_key())) {
541 src_it.extract();
542 working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
543 &completed_blocks, &to_blocks);
544 delete working_set;
545 src_it.forward();
546 }
547 // Make a new between-column WorkingSet for before the current column.
548 working_set = new WorkingPartSet(nullptr);
549 dest_it.add_after_then_move(working_set);
550 if (first_new_set == nullptr) {
551 first_new_set = working_set;
552 }
553 // A matching column gets to stay, and first_new_set gets all the
554 // completed_sets.
555 working_set = src_it.empty() ? nullptr : src_it.data();
556 if (working_set != nullptr &&
557 working_set->column()->MatchingColumns(*column)) {
558 working_set->set_column(column);
559 dest_it.add_after_then_move(src_it.extract());
560 src_it.forward();
561 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
562 first_new_set = nullptr;
563 } else {
564 // Just make a new working set for the current column.
565 working_set = new WorkingPartSet(column);
566 dest_it.add_after_then_move(working_set);
567 }
568 }
569 // Complete any remaining src working sets.
570 while (!src_it.empty()) {
571 working_set = src_it.extract();
572 working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
573 &completed_blocks, &to_blocks);
574 delete working_set;
575 src_it.forward();
576 }
577 // Make a new between-column WorkingSet for after the last column.
578 working_set = new WorkingPartSet(nullptr);
579 dest_it.add_after_then_move(working_set);
580 if (first_new_set == nullptr) {
581 first_new_set = working_set;
582 }
583 // The first_new_set now gets any accumulated completed_parts/blocks.
584 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
585 }
586
587 // Accumulate the widths and gaps into the given variables.
AccumulateColumnWidthsAndGaps(int * total_width,int * width_samples,int * total_gap,int * gap_samples)588 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width,
589 int *width_samples,
590 int *total_gap,
591 int *gap_samples) {
592 ColPartition_IT it(&parts_);
593 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
594 ColPartition *part = it.data();
595 *total_width += part->ColumnWidth();
596 ++*width_samples;
597 if (!it.at_last()) {
598 ColPartition *next_part = it.data_relative(1);
599 int part_left = part->right_key();
600 int part_right = next_part->left_key();
601 int gap = part->KeyWidth(part_left, part_right);
602 *total_gap += gap;
603 ++*gap_samples;
604 }
605 }
606 }
607
608 // Provide debug output for this ColPartitionSet and all the ColPartitions.
Print()609 void ColPartitionSet::Print() {
610 ColPartition_IT it(&parts_);
611 tprintf(
612 "Partition set of %d parts, %d good, coverage=%d+%d"
613 " (%d,%d)->(%d,%d)\n",
614 it.length(), good_column_count_, good_coverage_, bad_coverage_,
615 bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
616 bounding_box_.top());
617 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
618 ColPartition *part = it.data();
619 part->Print();
620 }
621 }
622
623 // PRIVATE CODE.
624
625 // Add the given partition to the list in the appropriate place.
AddPartition(ColPartition * new_part,ColPartition_IT * it)626 void ColPartitionSet::AddPartition(ColPartition *new_part,
627 ColPartition_IT *it) {
628 AddPartitionCoverageAndBox(*new_part);
629 int new_right = new_part->right_key();
630 if (it->data()->left_key() >= new_right) {
631 it->add_before_stay_put(new_part);
632 } else {
633 it->add_after_stay_put(new_part);
634 }
635 }
636
637 // Compute the coverage and good column count. Coverage is the amount of the
638 // width of the page (in pixels) that is covered by ColPartitions, which are
639 // used to provide candidate column layouts.
640 // Coverage is split into good and bad. Good coverage is provided by
641 // ColPartitions of a frequent width (according to the callback function
642 // provided by TabFinder::WidthCB, which accesses stored statistics on the
643 // widths of ColPartitions) and bad coverage is provided by all other
644 // ColPartitions, even if they have tab vectors at both sides. Thus:
645 // |-----------------------------------------------------------------|
646 // | Double width heading |
647 // |-----------------------------------------------------------------|
648 // |-------------------------------| |-------------------------------|
649 // | Common width ColParition | | Common width ColPartition |
650 // |-------------------------------| |-------------------------------|
651 // the layout with two common-width columns has better coverage than the
652 // double width heading, because the coverage is "good," even though less in
653 // total coverage than the heading, because the heading coverage is "bad."
ComputeCoverage()654 void ColPartitionSet::ComputeCoverage() {
655 // Count the number of good columns and sum their width.
656 ColPartition_IT it(&parts_);
657 good_column_count_ = 0;
658 good_coverage_ = 0;
659 bad_coverage_ = 0;
660 bounding_box_ = TBOX();
661 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
662 ColPartition *part = it.data();
663 AddPartitionCoverageAndBox(*part);
664 }
665 }
666
667 // Adds the coverage, column count and box for a single partition,
668 // without adding it to the list. (Helper factored from ComputeCoverage.)
AddPartitionCoverageAndBox(const ColPartition & part)669 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition &part) {
670 bounding_box_ += part.bounding_box();
671 int coverage = part.ColumnWidth();
672 if (part.good_width()) {
673 good_coverage_ += coverage;
674 good_column_count_ += 2;
675 } else {
676 if (part.blob_type() < BRT_UNKNOWN) {
677 coverage /= 2;
678 }
679 if (part.good_column()) {
680 ++good_column_count_;
681 }
682 bad_coverage_ += coverage;
683 }
684 }
685
686 } // namespace tesseract.
687