1 /////////////////////////////////////////////////////////////////////// 2 // File: colpartitiongrid.h 3 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 4 // Author: Ray Smith 5 // 6 // (C) Copyright 2009, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H_ 20 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H_ 21 22 #include "bbgrid.h" 23 #include "colpartition.h" 24 #include "colpartitionset.h" 25 26 namespace tesseract { 27 28 class TabFind; 29 30 // ColPartitionGrid is a BBGrid of ColPartition. 31 // It collects functions that work on the grid. 32 class TESS_API ColPartitionGrid 33 : public BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT> { 34 public: 35 ColPartitionGrid() = default; 36 ColPartitionGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright); 37 38 ~ColPartitionGrid() override = default; 39 40 // Handles a click event in a display window. 41 void HandleClick(int x, int y) override; 42 43 // Merges ColPartitions in the grid that look like they belong in the same 44 // textline. 45 // For all partitions in the grid, calls the box_cb permanent callback 46 // to compute the search box, searches the box, and if a candidate is found, 47 // calls the confirm_cb to check any more rules. If the confirm_cb returns 48 // true, then the partitions are merged. 49 // Both callbacks are deleted before returning. 50 void Merges(const std::function<bool(ColPartition *, TBOX *)> &box_cb, 51 const std::function<bool(const ColPartition *, 52 const ColPartition *)> &confirm_cb); 53 54 // For the given partition, calls the box_cb permanent callback 55 // to compute the search box, searches the box, and if a candidate is found, 56 // calls the confirm_cb to check any more rules. If the confirm_cb returns 57 // true, then the partitions are merged. 58 // Returns true if the partition is consumed by one or more merges. 59 bool MergePart(const std::function<bool(ColPartition *, TBOX *)> &box_cb, 60 const std::function<bool(const ColPartition *, 61 const ColPartition *)> &confirm_cb, 62 ColPartition *part); 63 64 // Computes and returns the total overlap of all partitions in the grid. 65 // If overlap_grid is non-null, it is filled with a grid that holds empty 66 // partitions representing the union of all overlapped partitions. 67 int ComputeTotalOverlap(ColPartitionGrid **overlap_grid); 68 69 // Finds all the ColPartitions in the grid that overlap with the given 70 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 71 // Any partition equal to not_this (may be nullptr) is excluded. 72 void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, 73 ColPartition_CLIST *parts); 74 75 // Finds and returns the best candidate ColPartition to merge with part, 76 // selected from the candidates list, based on the minimum increase in 77 // pairwise overlap among all the partitions overlapped by the combined box. 78 // If overlap_increase is not nullptr then it returns the increase in overlap 79 // that would result from the merge. 80 // See colpartitiongrid.cpp for a diagram. 81 ColPartition *BestMergeCandidate( 82 const ColPartition *part, ColPartition_CLIST *candidates, bool debug, 83 const std::function<bool(const ColPartition *, const ColPartition *)> 84 &confirm_cb, 85 int *overlap_increase); 86 87 // Split partitions where it reduces overlap between their bounding boxes. 88 // ColPartitions are after all supposed to be a partitioning of the blobs 89 // AND of the space on the page! 90 // Blobs that cause overlaps get removed, put in individual partitions 91 // and added to the big_parts list. They are most likely characters on 92 // 2 textlines that touch, or something big like a dropcap. 93 void SplitOverlappingPartitions(ColPartition_LIST *big_parts); 94 95 // Filters partitions of source_type by looking at local neighbours. 96 // Where a majority of neighbours have a text type, the partitions are 97 // changed to text, where the neighbours have image type, they are changed 98 // to image, and partitions that have no definite neighbourhood type are 99 // left unchanged. 100 // im_box and rerotation are used to map blob coordinates onto the 101 // nontext_map, which is used to prevent the spread of text neighbourhoods 102 // into images. 103 // Returns true if anything was changed. 104 bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, 105 const TBOX &im_box, const FCOORD &rerotation); 106 107 // Reflects the grid and its colpartitions in the y-axis, assuming that 108 // all blob boxes have already been done. 109 void ReflectInYAxis(); 110 111 // Rotates the grid and its colpartitions by the given angle, assuming that 112 // all blob boxes have already been done. 113 void Deskew(const FCOORD &deskew); 114 115 // Transforms the grid of partitions to the output blocks, putting each 116 // partition into a separate block. We don't really care about the order, 117 // as we just want to get as much text as possible without trying to organize 118 // it into proper blocks or columns. 119 void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); 120 121 // Sets the left and right tabs of the partitions in the grid. 122 void SetTabStops(TabFind *tabgrid); 123 124 // Makes the ColPartSets and puts them in the PartSetVector ready 125 // for finding column bounds. Returns false if no partitions were found. 126 // Each ColPartition in the grid is placed in a single ColPartSet based 127 // on the bottom-left of its bounding box. 128 bool MakeColPartSets(PartSetVector *part_sets); 129 130 // Makes a single ColPartitionSet consisting of a single ColPartition that 131 // represents the total horizontal extent of the significant content on the 132 // page. Used for the single column setting in place of automatic detection. 133 // Returns nullptr if the page is empty of significant content. 134 ColPartitionSet *MakeSingleColumnSet(WidthCallback cb); 135 136 // Mark the BLOBNBOXes in each partition as being owned by that partition. 137 void ClaimBoxes(); 138 139 // Retypes all the blobs referenced by the partitions in the grid. 140 // Image blobs are sliced on the grid boundaries to give the tab finder 141 // a better handle on the edges of the images, and the actual blobs are 142 // returned in the im_blobs list, as they are not owned by the block. 143 void ReTypeBlobs(BLOBNBOX_LIST *im_blobs); 144 145 // The boxes within the partitions have changed (by deskew) so recompute 146 // the bounds of all the partitions and reinsert them into the grid. 147 void RecomputeBounds(int gridsize, const ICOORD &bleft, const ICOORD &tright, 148 const ICOORD &vertical); 149 150 // Improves the margins of the ColPartitions in the grid by calling 151 // FindPartitionMargins on each. 152 void GridFindMargins(ColPartitionSet **best_columns); 153 154 // Improves the margins of the ColPartitions in the list by calling 155 // FindPartitionMargins on each. 156 void ListFindMargins(ColPartitionSet **best_columns, 157 ColPartition_LIST *parts); 158 159 // Deletes all the partitions in the grid after disowning all the blobs. 160 void DeleteParts(); 161 162 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 163 // all the blobs in them. 164 void DeleteUnknownParts(TO_BLOCK *block); 165 166 // Deletes all the partitions in the grid that are NOT of flow type 167 // BTFT_LEADER. 168 void DeleteNonLeaderParts(); 169 170 // Finds and marks text partitions that represent figure captions. 171 void FindFigureCaptions(); 172 173 //////// Functions that manipulate ColPartitions in the grid /////// 174 //////// to find chains of partner partitions of the same type. /////// 175 // For every ColPartition in the grid, finds its upper and lower neighbours. 176 void FindPartitionPartners(); 177 // Finds the best partner in the given direction for the given partition. 178 // Stores the result with AddPartner. 179 void FindPartitionPartners(bool upper, ColPartition *part); 180 // Finds the best partner in the given direction for the given partition. 181 // Stores the result with AddPartner. 182 void FindVPartitionPartners(bool to_the_left, ColPartition *part); 183 // For every ColPartition with multiple partners in the grid, reduces the 184 // number of partners to 0 or 1. If get_desperate is true, goes to more 185 // desperate merge methods to merge flowing text before breaking partnerships. 186 void RefinePartitionPartners(bool get_desperate); 187 188 private: 189 // Finds and returns a list of candidate ColPartitions to merge with part. 190 // The candidates must overlap search_box, and when merged must not 191 // overlap any other partitions that are not overlapped by each individually. 192 void FindMergeCandidates(const ColPartition *part, const TBOX &search_box, 193 bool debug, ColPartition_CLIST *candidates); 194 195 // Smoothes the region type/flow type of the given part by looking at local 196 // neighbours and the given image mask. Searches a padded rectangle with the 197 // padding truncated on one size of the part's box in turn for each side, 198 // using the result (if any) that has the least distance to all neighbours 199 // that contribute to the decision. This biases in favor of rectangular 200 // regions without completely enforcing them. 201 // If a good decision cannot be reached, the part is left unchanged. 202 // im_box and rerotation are used to map blob coordinates onto the 203 // nontext_map, which is used to prevent the spread of text neighbourhoods 204 // into images. 205 // Returns true if the partition was changed. 206 bool SmoothRegionType(Image nontext_map, const TBOX &im_box, 207 const FCOORD &rerotation, bool debug, 208 ColPartition *part); 209 // Executes the search for SmoothRegionType in a single direction. 210 // Creates a bounding box that is padded in all directions except direction, 211 // and searches it for other partitions. Finds the nearest collection of 212 // partitions that makes a decisive result (if any) and returns the type 213 // and the distance of the collection. If there are any pixels in the 214 // nontext_map, then the decision is biased towards image. 215 BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, 216 Image nontext_map, const TBOX &im_box, 217 const FCOORD &rerotation, bool debug, 218 const ColPartition &part, 219 int *best_distance); 220 // Counts the partitions in the given search_box by appending the gap 221 // distance (scaled by dist_scaling) of the part from the base_part to the 222 // vector of the appropriate type for the partition. Prior to return, the 223 // vectors in the dists array are sorted in increasing order. 224 // dists must be an array of vectors of size NPT_COUNT. 225 void AccumulatePartDistances(const ColPartition &base_part, 226 const ICOORD &dist_scaling, 227 const TBOX &search_box, Image nontext_map, 228 const TBOX &im_box, const FCOORD &rerotation, 229 bool debug, std::vector<int> *dists); 230 231 // Improves the margins of the ColPartition by searching for 232 // neighbours that vertically overlap significantly. 233 void FindPartitionMargins(ColPartitionSet *columns, ColPartition *part); 234 235 // Starting at x, and going in the specified direction, up to x_limit, finds 236 // the margin for the given y range by searching sideways, 237 // and ignoring not_this. 238 int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, 239 int y_top, const ColPartition *not_this); 240 }; 241 242 } // namespace tesseract. 243 244 #endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H_ 245