1 ///////////////////////////////////////////////////////////////////////
2 // File:        colpartition.h
3 // Description: Class to hold partitions of the page that correspond
4 //              roughly to text lines.
5 // Author:      Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #ifndef TESSERACT_TEXTORD_COLPARTITION_H_
21 #define TESSERACT_TEXTORD_COLPARTITION_H_
22 
23 #include "bbgrid.h"
24 #include "blobbox.h" // For BlobRegionType.
25 #include "ocrblock.h"
26 #include "rect.h" // For TBOX.
27 #include "scrollview.h"
28 #include "tabfind.h"   // For WidthCallback.
29 #include "tabvector.h" // For BLOBNBOX_CLIST.
30 
31 #include <algorithm>
32 
33 namespace tesseract {
34 
35 // Number of colors in the color1, color2 arrays.
36 const int kRGBRMSColors = 4;
37 
38 class ColPartition;
39 class ColPartitionSet;
40 class ColPartitionGrid;
41 class WorkingPartSet;
42 class WorkingPartSet_LIST;
43 
44 // An enum to indicate how a partition sits on the columns.
45 // The order of flowing/heading/pullout must be kept consistent with
46 // PolyBlockType.
47 enum ColumnSpanningType {
48   CST_NOISE,   // Strictly between columns.
49   CST_FLOWING, // Strictly within a single column.
50   CST_HEADING, // Spans multiple columns.
51   CST_PULLOUT, // Touches multiple columns, but doesn't span them.
52   CST_COUNT    // Number of entries.
53 };
54 
55 ELIST2IZEH(ColPartition)
CLISTIZEH(ColPartition)56 CLISTIZEH(ColPartition)
57 
58 /**
59  * ColPartition is a partition of a horizontal slice of the page.
60  * It starts out as a collection of blobs at a particular y-coord in the grid,
61  * but ends up (after merging and uniquing) as an approximate text line.
62  * ColPartitions are also used to hold a partitioning of the page into
63  * columns, each representing one column. Although a ColPartition applies
64  * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
65  * emerges, which represents the columns over a wide y-coordinate range.
66  */
67 class TESS_API ColPartition : public ELIST2_LINK {
68 public:
69   // This empty constructor is here only so that the class can be ELISTIZED.
70   // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
71   // and eliminate CLASSNAME##_copier.
72   ColPartition() = default;
73 
74   /**
75    * @param blob_type is the blob_region_type_ of the blobs in this partition.
76    * @param vertical is the direction of logical vertical on the possibly skewed
77    * image.
78    */
79   ColPartition(BlobRegionType blob_type, const ICOORD &vertical);
80   /**
81    * Constructs a fake ColPartition with no BLOBNBOXes to represent a
82    * horizontal or vertical line, given a type and a bounding box.
83    */
84   static ColPartition *MakeLinePartition(BlobRegionType blob_type,
85                                          const ICOORD &vertical, int left,
86                                          int bottom, int right, int top);
87 
88   // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
89   // all made from a single TBOX.
90   // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
91   // the ColPartition owns the BLOBNBOX!!!
92   // Call DeleteBoxes before deleting the ColPartition.
93   static ColPartition *FakePartition(const TBOX &box, PolyBlockType block_type,
94                                      BlobRegionType blob_type,
95                                      BlobTextFlowType flow);
96 
97   // Constructs and returns a ColPartition with the given real BLOBNBOX,
98   // and sets it up to be a "big" partition (single-blob partition bigger
99   // than the surrounding text that may be a dropcap, two or more vertically
100   // touching characters, or some graphic element.
101   // If the given list is not nullptr, the partition is also added to the list.
102   static ColPartition *MakeBigPartition(BLOBNBOX *box,
103                                         ColPartition_LIST *big_part_list);
104 
105   ~ColPartition();
106 
107   // Simple accessors.
108   const TBOX &bounding_box() const {
109     return bounding_box_;
110   }
111   int left_margin() const {
112     return left_margin_;
113   }
114   void set_left_margin(int margin) {
115     left_margin_ = margin;
116   }
117   int right_margin() const {
118     return right_margin_;
119   }
120   void set_right_margin(int margin) {
121     right_margin_ = margin;
122   }
123   int median_top() const {
124     return median_top_;
125   }
126   int median_bottom() const {
127     return median_bottom_;
128   }
129   int median_left() const {
130     return median_left_;
131   }
132   int median_right() const {
133     return median_right_;
134   }
135   int median_height() const {
136     return median_height_;
137   }
138   void set_median_height(int height) {
139     median_height_ = height;
140   }
141   int median_width() const {
142     return median_width_;
143   }
144   void set_median_width(int width) {
145     median_width_ = width;
146   }
147   BlobRegionType blob_type() const {
148     return blob_type_;
149   }
150   void set_blob_type(BlobRegionType t) {
151     blob_type_ = t;
152   }
153   BlobTextFlowType flow() const {
154     return flow_;
155   }
156   void set_flow(BlobTextFlowType f) {
157     flow_ = f;
158   }
159   int good_blob_score() const {
160     return good_blob_score_;
161   }
162   bool good_width() const {
163     return good_width_;
164   }
165   bool good_column() const {
166     return good_column_;
167   }
168   bool left_key_tab() const {
169     return left_key_tab_;
170   }
171   int left_key() const {
172     return left_key_;
173   }
174   bool right_key_tab() const {
175     return right_key_tab_;
176   }
177   int right_key() const {
178     return right_key_;
179   }
180   PolyBlockType type() const {
181     return type_;
182   }
183   void set_type(PolyBlockType t) {
184     type_ = t;
185   }
186   BLOBNBOX_CLIST *boxes() {
187     return &boxes_;
188   }
189   int boxes_count() const {
190     return boxes_.length();
191   }
192   void set_vertical(const ICOORD &v) {
193     vertical_ = v;
194   }
195   ColPartition_CLIST *upper_partners() {
196     return &upper_partners_;
197   }
198   ColPartition_CLIST *lower_partners() {
199     return &lower_partners_;
200   }
201   void set_working_set(WorkingPartSet *working_set) {
202     working_set_ = working_set;
203   }
204   bool block_owned() const {
205     return block_owned_;
206   }
207   void set_block_owned(bool owned) {
208     block_owned_ = owned;
209   }
210   bool desperately_merged() const {
211     return desperately_merged_;
212   }
213   ColPartitionSet *column_set() const {
214     return column_set_;
215   }
216   void set_side_step(int step) {
217     side_step_ = step;
218   }
219   int bottom_spacing() const {
220     return bottom_spacing_;
221   }
222   void set_bottom_spacing(int spacing) {
223     bottom_spacing_ = spacing;
224   }
225   int top_spacing() const {
226     return top_spacing_;
227   }
228   void set_top_spacing(int spacing) {
229     top_spacing_ = spacing;
230   }
231 
232   void set_table_type() {
233     if (type_ != PT_TABLE) {
234       type_before_table_ = type_;
235       type_ = PT_TABLE;
236     }
237   }
238   void clear_table_type() {
239     if (type_ == PT_TABLE) {
240       type_ = type_before_table_;
241     }
242   }
243   bool inside_table_column() {
244     return inside_table_column_;
245   }
246   void set_inside_table_column(bool val) {
247     inside_table_column_ = val;
248   }
249   ColPartition *nearest_neighbor_above() const {
250     return nearest_neighbor_above_;
251   }
252   void set_nearest_neighbor_above(ColPartition *part) {
253     nearest_neighbor_above_ = part;
254   }
255   ColPartition *nearest_neighbor_below() const {
256     return nearest_neighbor_below_;
257   }
258   void set_nearest_neighbor_below(ColPartition *part) {
259     nearest_neighbor_below_ = part;
260   }
261   int space_above() const {
262     return space_above_;
263   }
264   void set_space_above(int space) {
265     space_above_ = space;
266   }
267   int space_below() const {
268     return space_below_;
269   }
270   void set_space_below(int space) {
271     space_below_ = space;
272   }
273   int space_to_left() const {
274     return space_to_left_;
275   }
276   void set_space_to_left(int space) {
277     space_to_left_ = space;
278   }
279   int space_to_right() const {
280     return space_to_right_;
281   }
282   void set_space_to_right(int space) {
283     space_to_right_ = space;
284   }
285   uint8_t *color1() {
286     return color1_;
287   }
288   uint8_t *color2() {
289     return color2_;
290   }
291   bool owns_blobs() const {
292     return owns_blobs_;
293   }
294   void set_owns_blobs(bool owns_blobs) {
295     // Do NOT change ownership flag when there are blobs in the list.
296     // Immediately set the ownership flag when creating copies.
297     ASSERT_HOST(boxes_.empty());
298     owns_blobs_ = owns_blobs;
299   }
300 
301   // Inline quasi-accessors that require some computation.
302 
303   // Returns the middle y-coord of the bounding box.
304   int MidY() const {
305     return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306   }
307   // Returns the middle y-coord of the median top and bottom.
308   int MedianY() const {
309     return (median_top_ + median_bottom_) / 2;
310   }
311   // Returns the middle x-coord of the bounding box.
312   int MidX() const {
313     return (bounding_box_.left() + bounding_box_.right()) / 2;
314   }
315   // Returns the sort key at any given x,y.
316   int SortKey(int x, int y) const {
317     return TabVector::SortKey(vertical_, x, y);
318   }
319   // Returns the x corresponding to the sortkey, y pair.
320   int XAtY(int sort_key, int y) const {
321     return TabVector::XAtY(vertical_, sort_key, y);
322   }
323   // Returns the x difference between the two sort keys.
324   int KeyWidth(int left_key, int right_key) const {
325     return (right_key - left_key) / vertical_.y();
326   }
327   // Returns the column width between the left and right keys.
328   int ColumnWidth() const {
329     return KeyWidth(left_key_, right_key_);
330   }
331   // Returns the sort key of the box left edge.
332   int BoxLeftKey() const {
333     return SortKey(bounding_box_.left(), MidY());
334   }
335   // Returns the sort key of the box right edge.
336   int BoxRightKey() const {
337     return SortKey(bounding_box_.right(), MidY());
338   }
339   // Returns the left edge at the given y, using the sort key.
340   int LeftAtY(int y) const {
341     return XAtY(left_key_, y);
342   }
343   // Returns the right edge at the given y, using the sort key.
344   int RightAtY(int y) const {
345     return XAtY(right_key_, y);
346   }
347   // Returns true if the right edge of this is to the left of the right
348   // edge of other.
349   bool IsLeftOf(const ColPartition &other) const {
350     return bounding_box_.right() < other.bounding_box_.right();
351   }
352   // Returns true if the partition contains the given x coordinate at the y.
353   bool ColumnContains(int x, int y) const {
354     return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355   }
356   // Returns true if there are no blobs in the list.
357   bool IsEmpty() const {
358     return boxes_.empty();
359   }
360   // Returns true if there is a single blob in the list.
361   bool IsSingleton() const {
362     return boxes_.singleton();
363   }
364   // Returns true if this and other overlap horizontally by bounding box.
365   bool HOverlaps(const ColPartition &other) const {
366     return bounding_box_.x_overlap(other.bounding_box_);
367   }
368   // Returns true if this and other's bounding boxes overlap vertically.
369   // TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
370   bool VOverlaps(const ColPartition &other) const {
371     return bounding_box_.y_gap(other.bounding_box_) < 0;
372   }
373   // Returns the vertical overlap (by median) of this and other.
374   // WARNING! Only makes sense on horizontal partitions!
375   int VCoreOverlap(const ColPartition &other) const {
376     if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
377       return 0;
378     }
379     return std::min(median_top_, other.median_top_) -
380            std::max(median_bottom_, other.median_bottom_);
381   }
382   // Returns the horizontal overlap (by median) of this and other.
383   // WARNING! Only makes sense on vertical partitions!
384   int HCoreOverlap(const ColPartition &other) const {
385     return std::min(median_right_, other.median_right_) -
386            std::max(median_left_, other.median_left_);
387   }
388   // Returns true if this and other overlap significantly vertically.
389   // WARNING! Only makes sense on horizontal partitions!
390   bool VSignificantCoreOverlap(const ColPartition &other) const {
391     if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
392       return false;
393     }
394     int overlap = VCoreOverlap(other);
395     int height = std::min(median_top_ - median_bottom_,
396                           other.median_top_ - other.median_bottom_);
397     return overlap * 3 > height;
398   }
399   // Returns true if this and other can be combined without putting a
400   // horizontal step in either left or right edge of the resulting block.
401   bool WithinSameMargins(const ColPartition &other) const {
402     return left_margin_ <= other.bounding_box_.left() &&
403            bounding_box_.left() >= other.left_margin_ &&
404            bounding_box_.right() <= other.right_margin_ &&
405            right_margin_ >= other.bounding_box_.right();
406   }
407   // Returns true if the region types (aligned_text_) match.
408   // Lines never match anything, as they should never be merged or chained.
409   bool TypesMatch(const ColPartition &other) const {
410     return TypesMatch(blob_type_, other.blob_type_);
411   }
412   static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
413     return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
414            !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
415   }
416 
417   // Returns true if the types are similar to each other.
418   static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
419     return (type1 == type2 ||
420             (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
421             (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
422   }
423 
424   // Returns true if partitions is of horizontal line type
425   bool IsLineType() const {
426     return PTIsLineType(type_);
427   }
428   // Returns true if partitions is of image type
429   bool IsImageType() const {
430     return PTIsImageType(type_);
431   }
432   // Returns true if partitions is of text type
433   bool IsTextType() const {
434     return PTIsTextType(type_);
435   }
436   // Returns true if partitions is of pullout(inter-column) type
437   bool IsPulloutType() const {
438     return PTIsPulloutType(type_);
439   }
440   // Returns true if the partition is of an exclusively vertical type.
441   bool IsVerticalType() const {
442     return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
443   }
444   // Returns true if the partition is of a definite horizontal type.
445   bool IsHorizontalType() const {
446     return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
447   }
448   // Returns true is the partition is of a type that cannot be merged.
449   bool IsUnMergeableType() const {
450     return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
451   }
452   // Returns true if this partition is a vertical line
453   // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
454   bool IsVerticalLine() const {
455     return IsVerticalType() && IsLineType();
456   }
457   // Returns true if this partition is a horizontal line
458   // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
459   bool IsHorizontalLine() const {
460     return IsHorizontalType() && IsLineType();
461   }
462 
463   // Adds the given box to the partition, updating the partition bounds.
464   // The list of boxes in the partition is updated, ensuring that no box is
465   // recorded twice, and the boxes are kept in increasing left position.
466   void AddBox(BLOBNBOX *box);
467 
468   // Removes the given box from the partition, updating the bounds.
469   void RemoveBox(BLOBNBOX *box);
470 
471   // Returns the tallest box in the partition, as measured perpendicular to the
472   // presumed flow of text.
473   BLOBNBOX *BiggestBox();
474 
475   // Returns the bounding box excluding the given box.
476   TBOX BoundsWithoutBox(BLOBNBOX *box);
477 
478   // Claims the boxes in the boxes_list by marking them with a this owner
479   // pointer.
480   void ClaimBoxes();
481 
482   // nullptr the owner of the blobs in this partition, so they can be deleted
483   // independently of the ColPartition.
484   void DisownBoxes();
485   // nullptr the owner of the blobs in this partition that are owned by this
486   // partition, so they can be deleted independently of the ColPartition.
487   // Any blobs that are not owned by this partition get to keep their owner
488   // without an assert failure.
489   void DisownBoxesNoAssert();
490   // Nulls the owner of the blobs in this partition that are owned by this
491   // partition and not leader blobs, removing them from the boxes_ list, thus
492   // turning this partition back to a leader partition if it contains a leader,
493   // or otherwise leaving it empty. Returns true if any boxes remain.
494   bool ReleaseNonLeaderBoxes();
495 
496   // Delete the boxes that this partition owns.
497   void DeleteBoxes();
498 
499   // Reflects the partition in the y-axis, assuming that its blobs have
500   // already been done. Corrects only a limited part of the members, since
501   // this function is assumed to be used shortly after initial creation, which
502   // is before a lot of the members are used.
503   void ReflectInYAxis();
504 
505   // Returns true if this is a legal partition - meaning that the conditions
506   // left_margin <= bounding_box left
507   // left_key <= bounding box left key
508   // bounding box left <= bounding box right
509   // and likewise for right margin and key
510   // are all met.
511   bool IsLegal();
512 
513   // Returns true if the left and right edges are approximately equal.
514   bool MatchingColumns(const ColPartition &other) const;
515 
516   // Returns true if the colors match for two text partitions.
517   bool MatchingTextColor(const ColPartition &other) const;
518 
519   // Returns true if the sizes match for two text partitions,
520   // taking orientation into account
521   bool MatchingSizes(const ColPartition &other) const;
522 
523   // Returns true if there is no tabstop violation in merging this and other.
524   bool ConfirmNoTabViolation(const ColPartition &other) const;
525 
526   // Returns true if other has a similar stroke width to this.
527   bool MatchingStrokeWidth(const ColPartition &other,
528                            double fractional_tolerance,
529                            double constant_tolerance) const;
530   // Returns true if candidate is an acceptable diacritic base char merge
531   // with this as the diacritic.
532   bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const;
533 
534   // Sets the sort key using either the tab vector, or the bounding box if
535   // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
536   // use the edge of the box as a key any way.
537   void SetLeftTab(const TabVector *tab_vector);
538   void SetRightTab(const TabVector *tab_vector);
539 
540   // Copies the left/right tab from the src partition, but if take_box is
541   // true, copies the box instead and uses that as a key.
542   void CopyLeftTab(const ColPartition &src, bool take_box);
543   void CopyRightTab(const ColPartition &src, bool take_box);
544 
545   // Returns the left rule line x coord of the leftmost blob.
546   int LeftBlobRule() const;
547   // Returns the right rule line x coord of the rightmost blob.
548   int RightBlobRule() const;
549 
550   // Returns the density value for a particular BlobSpecialTextType.
551   float SpecialBlobsDensity(const BlobSpecialTextType type) const;
552   // Returns the number of blobs for a  particular BlobSpecialTextType.
553   int SpecialBlobsCount(const BlobSpecialTextType type);
554   // Set the density value for a particular BlobSpecialTextType, should ONLY be
555   // used for debugging or testing. In production code, use
556   // ComputeSpecialBlobsDensity instead.
557   void SetSpecialBlobsDensity(const BlobSpecialTextType type,
558                               const float density);
559   // Compute the SpecialTextType density of blobs, where we assume
560   // that the SpecialTextType in the boxes_ has been set.
561   void ComputeSpecialBlobsDensity();
562 
563   // Add a partner above if upper, otherwise below.
564   // Add them uniquely and keep the list sorted by box left.
565   // Partnerships are added symmetrically to partner and this.
566   void AddPartner(bool upper, ColPartition *partner);
567   // Removes the partner from this, but does not remove this from partner.
568   // This asymmetric removal is so as not to mess up the iterator that is
569   // working on partner's partner list.
570   void RemovePartner(bool upper, ColPartition *partner);
571   // Returns the partner if the given partner is a singleton, otherwise nullptr.
572   ColPartition *SingletonPartner(bool upper);
573 
574   // Merge with the other partition and delete it.
575   void Absorb(ColPartition *other, const WidthCallback &cb);
576 
577   // Returns true if the overlap between this and the merged pair of
578   // merge candidates is sufficiently trivial to be allowed.
579   // The merged box can graze the edge of this by the ok_box_overlap
580   // if that exceeds the margin to the median top and bottom.
581   bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
582                       int ok_box_overlap, bool debug);
583 
584   // Find the blob at which to split this to minimize the overlap with the
585   // given box. Returns the first blob to go in the second partition.
586   BLOBNBOX *OverlapSplitBlob(const TBOX &box);
587 
588   // Split this partition keeping the first half in this and returning
589   // the second half.
590   // Splits by putting the split_blob and the blobs that follow
591   // in the second half, and the rest in the first half.
592   ColPartition *SplitAtBlob(BLOBNBOX *split_blob);
593 
594   // Splits this partition at the given x coordinate, returning the right
595   // half and keeping the left half in this.
596   ColPartition *SplitAt(int split_x);
597 
598   // Recalculates all the coordinate limits of the partition.
599   void ComputeLimits();
600 
601   // Returns the number of boxes that overlap the given box.
602   int CountOverlappingBoxes(const TBOX &box);
603 
604   // Computes and sets the type_, first_column_, last_column_ and column_set_.
605   // resolution refers to the ppi resolution of the image.
606   void SetPartitionType(int resolution, ColPartitionSet *columns);
607 
608   // Returns the PartitionType from the current BlobRegionType and a column
609   // flow spanning type ColumnSpanningType, generated by
610   // ColPartitionSet::SpanningType, that indicates how the partition sits
611   // in the columns.
612   PolyBlockType PartitionType(ColumnSpanningType flow) const;
613 
614   // Returns the first and last column touched by this partition.
615   // resolution refers to the ppi resolution of the image.
616   void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
617                    int *last_col);
618 
619   // Sets the internal flags good_width_ and good_column_.
620   void SetColumnGoodness(const WidthCallback &cb);
621 
622   // Determines whether the blobs in this partition mostly represent
623   // a leader (fixed pitch sequence) and sets the member blobs accordingly.
624   // Note that height is assumed to have been tested elsewhere, and that this
625   // function will find most fixed-pitch text as leader without a height filter.
626   // Leader detection is limited to sequences of identical width objects,
627   // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
628   bool MarkAsLeaderIfMonospaced();
629   // Given the result of TextlineProjection::EvaluateColPartition, (positive for
630   // horizontal text, negative for vertical text, and near zero for non-text),
631   // sets the blob_type_ and flow_ for this partition to indicate whether it
632   // is strongly or weakly vertical or horizontal text, or non-text.
633   void SetRegionAndFlowTypesFromProjectionValue(int value);
634 
635   // Sets all blobs with the partition blob type and flow, but never overwrite
636   // leader blobs, as we need to be able to identify them later.
637   void SetBlobTypes();
638 
639   // Returns true if a decent baseline can be fitted through the blobs.
640   // Works for both horizontal and vertical text.
641   bool HasGoodBaseline();
642 
643   // Adds this ColPartition to a matching WorkingPartSet if one can be found,
644   // otherwise starts a new one in the appropriate column, ending the previous.
645   void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
646                        int resolution, ColPartition_LIST *used_parts,
647                        WorkingPartSet_LIST *working_set);
648 
649   // From the given block_parts list, builds one or more BLOCKs and
650   // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
651   // Created blocks are appended to the end of completed_blocks and to_blocks.
652   // The used partitions are put onto used_parts, as they may still be referred
653   // to in the partition grid. bleft, tright and resolution are the bounds
654   // and resolution of the original image.
655   static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
656                                 int resolution, ColPartition_LIST *block_parts,
657                                 ColPartition_LIST *used_parts,
658                                 BLOCK_LIST *completed_blocks,
659                                 TO_BLOCK_LIST *to_blocks);
660   // Constructs a block from the given list of partitions.
661   // Arguments are as LineSpacingBlocks above.
662   static TO_BLOCK *MakeBlock(const ICOORD &bleft, const ICOORD &tright,
663                              ColPartition_LIST *block_parts,
664                              ColPartition_LIST *used_parts);
665 
666   // Constructs a block from the given list of vertical text partitions.
667   // Currently only creates rectangular blocks.
668   static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft,
669                                          const ICOORD &tright,
670                                          ColPartition_LIST *block_parts,
671                                          ColPartition_LIST *used_parts);
672 
673   // Makes a TO_ROW matching this and moves all the blobs to it, transferring
674   // ownership to to returned TO_ROW.
675   TO_ROW *MakeToRow();
676 
677   // Returns a copy of everything except the list of boxes. The resulting
678   // ColPartition is only suitable for keeping in a column candidate list.
679   ColPartition *ShallowCopy() const;
680   // Returns a copy of everything with a shallow copy of the blobs.
681   // The blobs are still owned by their original parent, so they are
682   // treated as read-only.
683   ColPartition *CopyButDontOwnBlobs();
684 
685 #ifndef GRAPHICS_DISABLED
686   // Provides a color for BBGrid to draw the rectangle.
687   ScrollView::Color BoxColor() const;
688 #endif // !GRAPHICS_DISABLED
689 
690   // Prints debug information on this.
691   void Print() const;
692   // Prints debug information on the colors.
693   void PrintColors();
694 
695   // Sets the types of all partitions in the run to be the max of the types.
696   void SmoothPartnerRun(int working_set_count);
697 
698   // Cleans up the partners of the given type so that there is at most
699   // one partner. This makes block creation simpler.
700   // If get_desperate is true, goes to more desperate merge methods
701   // to merge flowing text before breaking partnerships.
702   void RefinePartners(PolyBlockType type, bool get_desperate,
703                       ColPartitionGrid *grid);
704 
705   // Returns true if this column partition is in the same column as
706   // part. This function will only work after the SetPartitionType function
707   // has been called on both column partitions. This is useful for
708   // doing a SideSearch when you want things in the same page column.
709   bool IsInSameColumnAs(const ColPartition &part) const;
710 
711   // Sort function to sort by bounding box.
712   static int SortByBBox(const void *p1, const void *p2) {
713     const ColPartition *part1 = *static_cast<const ColPartition *const *>(p1);
714     const ColPartition *part2 = *static_cast<const ColPartition *const *>(p2);
715     int mid_y1 = part1->bounding_box_.y_middle();
716     int mid_y2 = part2->bounding_box_.y_middle();
717     if ((part2->bounding_box_.bottom() <= mid_y1 &&
718          mid_y1 <= part2->bounding_box_.top()) ||
719         (part1->bounding_box_.bottom() <= mid_y2 &&
720          mid_y2 <= part1->bounding_box_.top())) {
721       // Sort by increasing x.
722       return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
723     }
724     // Sort by decreasing y.
725     return mid_y2 - mid_y1;
726   }
727 
728   // Sets the column bounds. Primarily used in testing.
729   void set_first_column(int column) {
730     first_column_ = column;
731   }
732   void set_last_column(int column) {
733     last_column_ = column;
734   }
735 
736 private:
737   // Cleans up the partners above if upper is true, else below.
738   // If get_desperate is true, goes to more desperate merge methods
739   // to merge flowing text before breaking partnerships.
740   void RefinePartnersInternal(bool upper, bool get_desperate,
741                               ColPartitionGrid *grid);
742   // Restricts the partners to only desirable types. For text and BRT_HLINE this
743   // means the same type_ , and for image types it means any image type.
744   void RefinePartnersByType(bool upper, ColPartition_CLIST *partners);
745   // Remove transitive partnerships: this<->a, and a<->b and this<->b.
746   // Gets rid of this<->b, leaving a clean chain.
747   // Also if we have this<->a and a<->this, then gets rid of this<->a, as
748   // this has multiple partners.
749   void RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partners);
750   // If multiple text partners can be merged, then do so.
751   // If desperate is true, then an increase in overlap with the merge is
752   // allowed. If the overlap increases, then the desperately_merged_ flag
753   // is set, indicating that the textlines probably need to be regenerated
754   // by aggressive line fitting/splitting, as there are probably vertically
755   // joined blobs that cross textlines.
756   void RefineTextPartnersByMerge(bool upper, bool desperate,
757                                  ColPartition_CLIST *partners,
758                                  ColPartitionGrid *grid);
759   // Keep the partner with the biggest overlap.
760   void RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners);
761 
762   // Return true if bbox belongs better in this than other.
763   bool ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other);
764 
765   // Smoothes the spacings in the list into groups of equal linespacing.
766   // resolution is the resolution of the original image, used as a basis
767   // for thresholds in change of spacing. page_height is in pixels.
768   static void SmoothSpacings(int resolution, int page_height,
769                              ColPartition_LIST *parts);
770 
771   // Returns true if the parts array of pointers to partitions matches the
772   // condition for a spacing blip. See SmoothSpacings for what this means
773   // and how it is used.
774   static bool OKSpacingBlip(int resolution, int median_spacing,
775                             ColPartition **parts, int offset);
776 
777   // Returns true if both the top and bottom spacings of this match the given
778   // spacing to within suitable margins dictated by the image resolution.
779   bool SpacingEqual(int spacing, int resolution) const;
780 
781   // Returns true if both the top and bottom spacings of this and other
782   // match to within suitable margins dictated by the image resolution.
783   bool SpacingsEqual(const ColPartition &other, int resolution) const;
784 
785   // Returns true if the sum spacing of this and other match the given
786   // spacing (or twice the given spacing) to within a suitable margin dictated
787   // by the image resolution.
788   bool SummedSpacingOK(const ColPartition &other, int spacing,
789                        int resolution) const;
790 
791   // Returns a suitable spacing margin that can be applied to bottoms of
792   // text lines, based on the resolution and the stored side_step_.
793   int BottomSpacingMargin(int resolution) const;
794 
795   // Returns a suitable spacing margin that can be applied to tops of
796   // text lines, based on the resolution and the stored side_step_.
797   int TopSpacingMargin(int resolution) const;
798 
799   // Returns true if the median text sizes of this and other agree to within
800   // a reasonable multiplicative factor.
801   bool SizesSimilar(const ColPartition &other) const;
802 
803   // Computes and returns in start, end a line segment formed from a
804   // forwards-iterated group of left edges of partitions that satisfy the
805   // condition that the rightmost left margin is to the left of the
806   // leftmost left bounding box edge.
807   // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
808   // directions, and to loosely wrap images.
809   static void LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end);
810   // Computes and returns in start, end a line segment formed from a
811   // backwards-iterated group of right edges of partitions that satisfy the
812   // condition that the leftmost right margin is to the right of the
813   // rightmost right bounding box edge.
814   // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
815   // directions, and to loosely wrap images.
816   static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
817                            ICOORD *end);
818 
819   // The margins are determined by the position of the nearest vertically
820   // overlapping neighbour to the side. They indicate the maximum extent
821   // that the block/column may be extended without touching something else.
822   // Leftmost coordinate that the region may occupy over the y limits.
823   int left_margin_ = 0;
824   // Rightmost coordinate that the region may occupy over the y limits.
825   int right_margin_ = 0;
826   // Bounding box of all blobs in the partition.
827   TBOX bounding_box_;
828   // Median top and bottom of blobs in this partition.
829   int median_bottom_ = 0;
830   int median_top_ = 0;
831   // Median height of blobs in this partition.
832   int median_height_ = 0;
833   // Median left and right of blobs in this partition.
834   int median_left_ = 0;
835   int median_right_ = 0;
836   // Median width of blobs in this partition.
837   int median_width_ = 0;
838   // blob_region_type_ for the blobs in this partition.
839   BlobRegionType blob_type_ = BRT_UNKNOWN;
840   BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow.
841   // Total of GoodTextBlob results for all blobs in the partition.
842   int good_blob_score_ = 0;
843   // True if this partition has a common width.
844   bool good_width_ = false;
845   // True if this is a good column candidate.
846   bool good_column_ = false;
847   // True if the left_key_ is from a tab vector.
848   bool left_key_tab_ = false;
849   // True if the right_key_ is from a tab vector.
850   bool right_key_tab_ = false;
851   // Left and right sort keys for the edges of the partition.
852   // If the respective *_key_tab_ is true then this key came from a tab vector.
853   // If not, then the class promises to keep the key equal to the sort key
854   // for the respective edge of the bounding box at the MidY, so that
855   // LeftAtY and RightAtY always returns an x coordinate on the line parallel
856   // to vertical_ through the bounding box edge at MidY.
857   int left_key_ = 0;
858   int right_key_ = 0;
859   // Type of this partition after looking at its relation to the columns.
860   PolyBlockType type_ = PT_UNKNOWN;
861   // The global vertical skew direction.
862   ICOORD vertical_;
863   // All boxes in the partition stored in increasing left edge coordinate.
864   BLOBNBOX_CLIST boxes_;
865   // The partitions above that matched this.
866   ColPartition_CLIST upper_partners_;
867   // The partitions below that matched this.
868   ColPartition_CLIST lower_partners_;
869   // The WorkingPartSet it lives in while blocks are being made.
870   WorkingPartSet *working_set_ = nullptr;
871   // Column_set_ is the column layout applicable to this ColPartition.
872   ColPartitionSet *column_set_ = nullptr;
873   // Flag is true when AddBox is sorting vertically, false otherwise.
874   bool last_add_was_vertical_ = false;
875   // True when the partition's ownership has been taken from the grid and
876   // placed in a working set, or, after that, in the good_parts_ list.
877   bool block_owned_ = false;
878   // Flag to indicate that this partition was subjected to a desperate merge,
879   // and therefore the textlines need rebuilding.
880   bool desperately_merged_ = false;
881   bool owns_blobs_ = true; // Does the partition own its blobs?
882   // The first and last column that this partition applies to.
883   // Flowing partitions (see type_) will have an equal first and last value
884   // of the form 2n + 1, where n is the zero-based index into the partitions
885   // in column_set_. (See ColPartitionSet::GetColumnByIndex).
886   // Heading partitions will have unequal values of the same form.
887   // Pullout partitions will have equal values, but may have even values,
888   // indicating placement between columns.
889   int first_column_ = -1;
890   int last_column_ = -1;
891   // Linespacing data.
892   int side_step_ = 0;      // Median y-shift to next blob on same line.
893   int top_spacing_ = 0;    // Line spacing from median_top_.
894   int bottom_spacing_ = 0; // Line spacing from median_bottom_.
895 
896   // Nearest neighbor above with major x-overlap
897   ColPartition *nearest_neighbor_above_ = nullptr;
898   // Nearest neighbor below with major x-overlap
899   ColPartition *nearest_neighbor_below_ = nullptr;
900   int space_above_ = 0;    // Distance from nearest_neighbor_above
901   int space_below_ = 0;    // Distance from nearest_neighbor_below
902   int space_to_left_ = 0;  // Distance from the left edge of the column
903   int space_to_right_ = 0; // Distance from the right edge of the column
904   // Color foreground/background data.
905   uint8_t color1_[kRGBRMSColors];
906   uint8_t color2_[kRGBRMSColors];
907   // The density of special blobs.
908   float special_blobs_densities_[BSTT_COUNT];
909   // Type of this partition before considering it as a table cell. This is
910   // used to revert the type if a partition is first marked as a table cell but
911   // later filtering steps decide it does not belong to a table
912   PolyBlockType type_before_table_ = PT_UNKNOWN;
913   // Check whether the current partition has been assigned to a table column.
914   bool inside_table_column_ = false;
915 };
916 
917 // Typedef it now in case it becomes a class later.
918 using ColPartitionGridSearch =
919     GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
920 
921 } // namespace tesseract.
922 
923 #endif // TESSERACT_TEXTORD_COLPARTITION_H_
924