1 ///////////////////////////////////////////////////////////////////////
2 // File: tabvector.h
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #ifndef TESSERACT_TEXTORD_TABVECTOR_H_
20 #define TESSERACT_TEXTORD_TABVECTOR_H_
21
22 #include "bbgrid.h"
23 #include "blobgrid.h"
24 #include "clst.h"
25 #include "elst.h"
26 #include "elst2.h"
27 #include "rect.h"
28
29 #include <algorithm>
30
31 class BLOBNBOX;
32 class ScrollView;
33
34 namespace tesseract {
35
36 extern double_VAR_H(textord_tabvector_vertical_gap_fraction);
37 extern double_VAR_H(textord_tabvector_vertical_box_ratio);
38
39 // The alignment type that a tab vector represents.
40 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
41 enum TabAlignment {
42 TA_LEFT_ALIGNED,
43 TA_LEFT_RAGGED,
44 TA_CENTER_JUSTIFIED,
45 TA_RIGHT_ALIGNED,
46 TA_RIGHT_RAGGED,
47 TA_SEPARATOR,
48 TA_COUNT
49 };
50
51 // Forward declarations. The classes use their own list types, so we
52 // need to make the list types first.
53 class TabFind;
54 class TabVector;
55 class TabConstraint;
56
57 ELIST2IZEH(TabVector)
CLISTIZEH(TabVector)58 CLISTIZEH(TabVector)
59 ELISTIZEH(TabConstraint)
60
61 // TabConstraint is a totally self-contained class to maintain
62 // a list of [min,max] constraints, each referring to a TabVector.
63 // The constraints are manipulated through static methods that act
64 // on a list of constraints. The list itself is cooperatively owned
65 // by the TabVectors of the constraints on the list and managed
66 // by implicit reference counting via the elements of the list.
67 class TabConstraint : public ELIST_LINK {
68 public:
69 // This empty constructor is here only so that the class can be ELISTIZED.
70 // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
71 // and eliminate CLASSNAME##_copier.
72 TabConstraint() = default;
73
74 // Create a constraint for the top or bottom of this TabVector.
75 static void CreateConstraint(TabVector *vector, bool is_top);
76
77 // Test to see if the constraints are compatible enough to merge.
78 static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
79
80 // Merge the lists of constraints and update the TabVector pointers.
81 // The second list is deleted.
82 static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
83
84 // Set all the tops and bottoms as appropriate to a mean of the
85 // constrained range. Delete all the constraints and list.
86 static void ApplyConstraints(TabConstraint_LIST *constraints);
87
88 private:
89 TabConstraint(TabVector *vector, bool is_top);
90
91 // Get the max of the mins and the min of the maxes.
92 static void GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max);
93
94 // The TabVector this constraint applies to.
95 TabVector *vector_;
96 // If true then we refer to the top of the vector_.
97 bool is_top_;
98 // The allowed range of this vector_.
99 int y_min_;
100 int y_max_;
101 };
102
103 // Class to hold information about a single vector
104 // that represents a tab stop or a rule line.
105 class TabVector : public ELIST2_LINK {
106 public:
107 // TODO(rays) fix this in elst.h line 1076, where it should use the
108 // copy constructor instead of operator=.
109 TabVector() = default;
110 ~TabVector() = default;
111
112 // Public factory to build a TabVector from a list of boxes.
113 // The TabVector will be of the given alignment type.
114 // The input vertical vector is used in fitting, and the output
115 // vertical_x, vertical_y have the resulting line vector added to them
116 // if the alignment is not ragged.
117 // The extended_start_y and extended_end_y are the maximum possible
118 // extension to the line segment that can be used to align with others.
119 // The input CLIST of BLOBNBOX good_points is consumed and taken over.
120 static TabVector *FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
121 int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
122 int *vertical_y);
123
124 // Build a ragged TabVector by copying another's direction, shifting it
125 // to match the given blob, and making its initial extent the height
126 // of the blob, but its extended bounds from the bounds of the original.
127 TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
128 BLOBNBOX *blob);
129
130 // Copies basic attributes of a tab vector for simple operations.
131 // Copies things such startpt, endpt, range, width.
132 // Does not copy things such as partners, boxes, or constraints.
133 // This is useful if you only need vector information for processing, such
134 // as in the table detection code.
135 TabVector *ShallowCopy() const;
136
137 // Simple accessors.
startpt()138 const ICOORD &startpt() const {
139 return startpt_;
140 }
endpt()141 const ICOORD &endpt() const {
142 return endpt_;
143 }
extended_ymax()144 int extended_ymax() const {
145 return extended_ymax_;
146 }
extended_ymin()147 int extended_ymin() const {
148 return extended_ymin_;
149 }
sort_key()150 int sort_key() const {
151 return sort_key_;
152 }
mean_width()153 int mean_width() const {
154 return mean_width_;
155 }
set_top_constraints(TabConstraint_LIST * constraints)156 void set_top_constraints(TabConstraint_LIST *constraints) {
157 top_constraints_ = constraints;
158 }
set_bottom_constraints(TabConstraint_LIST * constraints)159 void set_bottom_constraints(TabConstraint_LIST *constraints) {
160 bottom_constraints_ = constraints;
161 }
partners()162 TabVector_CLIST *partners() {
163 return &partners_;
164 }
set_startpt(const ICOORD & start)165 void set_startpt(const ICOORD &start) {
166 startpt_ = start;
167 }
set_endpt(const ICOORD & end)168 void set_endpt(const ICOORD &end) {
169 endpt_ = end;
170 }
intersects_other_lines()171 bool intersects_other_lines() const {
172 return intersects_other_lines_;
173 }
set_intersects_other_lines(bool value)174 void set_intersects_other_lines(bool value) {
175 intersects_other_lines_ = value;
176 }
177
178 // Inline quasi-accessors that require some computation.
179
180 // Compute the x coordinate at the given y coordinate.
XAtY(int y)181 int XAtY(int y) const {
182 int height = endpt_.y() - startpt_.y();
183 if (height != 0) {
184 return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x();
185 } else {
186 return startpt_.x();
187 }
188 }
189
190 // Compute the vertical overlap with the other TabVector.
VOverlap(const TabVector & other)191 int VOverlap(const TabVector &other) const {
192 return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y());
193 }
194 // Compute the vertical overlap with the given y bounds.
VOverlap(int top_y,int bottom_y)195 int VOverlap(int top_y, int bottom_y) const {
196 return std::min(top_y, static_cast<int>(endpt_.y())) -
197 std::max(bottom_y, static_cast<int>(startpt_.y()));
198 }
199 // Compute the extended vertical overlap with the given y bounds.
ExtendedOverlap(int top_y,int bottom_y)200 int ExtendedOverlap(int top_y, int bottom_y) const {
201 return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
202 }
203
204 // Return true if this is a left tab stop, either aligned, or ragged.
IsLeftTab()205 bool IsLeftTab() const {
206 return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
207 }
208 // Return true if this is a right tab stop, either aligned, or ragged.
IsRightTab()209 bool IsRightTab() const {
210 return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
211 }
212 // Return true if this is a separator.
IsSeparator()213 bool IsSeparator() const {
214 return alignment_ == TA_SEPARATOR;
215 }
216 // Return true if this is a center aligned tab stop.
IsCenterTab()217 bool IsCenterTab() const {
218 return alignment_ == TA_CENTER_JUSTIFIED;
219 }
220 // Return true if this is a ragged tab top, either left or right.
IsRagged()221 bool IsRagged() const {
222 return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
223 }
224
225 // Return true if this vector is to the left of the other in terms
226 // of sort_key_.
IsLeftOf(const TabVector & other)227 bool IsLeftOf(const TabVector &other) const {
228 return sort_key_ < other.sort_key_;
229 }
230
231 // Return true if the vector has no partners.
Partnerless()232 bool Partnerless() {
233 return partners_.empty();
234 }
235
236 // Return the number of tab boxes in this vector.
BoxCount()237 int BoxCount() {
238 return boxes_.length();
239 }
240
241 // Lock the vector from refits by clearing the boxes_ list.
Freeze()242 void Freeze() {
243 boxes_.shallow_clear();
244 }
245
246 // Flip x and y on the ends so a vector can be created from flipped input.
XYFlip()247 void XYFlip() {
248 int x = startpt_.y();
249 startpt_.set_y(startpt_.x());
250 startpt_.set_x(x);
251 x = endpt_.y();
252 endpt_.set_y(endpt_.x());
253 endpt_.set_x(x);
254 }
255
256 // Reflect the tab vector in the y-axis.
ReflectInYAxis()257 void ReflectInYAxis() {
258 startpt_.set_x(-startpt_.x());
259 endpt_.set_x(-endpt_.x());
260 sort_key_ = -sort_key_;
261 if (alignment_ == TA_LEFT_ALIGNED) {
262 alignment_ = TA_RIGHT_ALIGNED;
263 } else if (alignment_ == TA_RIGHT_ALIGNED) {
264 alignment_ = TA_LEFT_ALIGNED;
265 }
266 if (alignment_ == TA_LEFT_RAGGED) {
267 alignment_ = TA_RIGHT_RAGGED;
268 } else if (alignment_ == TA_RIGHT_RAGGED) {
269 alignment_ = TA_LEFT_RAGGED;
270 }
271 }
272
273 // Separate function to compute the sort key for a given coordinate pair.
SortKey(const ICOORD & vertical,int x,int y)274 static int SortKey(const ICOORD &vertical, int x, int y) {
275 ICOORD pt(x, y);
276 return pt * vertical;
277 }
278
279 // Return the x at the given y for the given sort key.
XAtY(const ICOORD & vertical,int sort_key,int y)280 static int XAtY(const ICOORD &vertical, int sort_key, int y) {
281 if (vertical.y() != 0) {
282 return (vertical.x() * y + sort_key) / vertical.y();
283 } else {
284 return sort_key;
285 }
286 }
287
288 // Sort function for E2LIST::sort to sort by sort_key_.
SortVectorsByKey(const void * v1,const void * v2)289 static int SortVectorsByKey(const void *v1, const void *v2) {
290 const TabVector *tv1 = *static_cast<const TabVector *const *>(v1);
291 const TabVector *tv2 = *static_cast<const TabVector *const *>(v2);
292 return tv1->sort_key_ - tv2->sort_key_;
293 }
294
295 // More complex members.
296
297 // Extend this vector to include the supplied blob if it doesn't
298 // already have it.
299 void ExtendToBox(BLOBNBOX *blob);
300
301 // Set the ycoord of the start and move the xcoord to match.
302 void SetYStart(int start_y);
303 // Set the ycoord of the end and move the xcoord to match.
304 void SetYEnd(int end_y);
305
306 // Rotate the ends by the given vector.
307 void Rotate(const FCOORD &rotation);
308
309 // Setup the initial constraints, being the limits of
310 // the vector and the extended ends.
311 void SetupConstraints();
312
313 // Setup the constraints between the partners of this TabVector.
314 void SetupPartnerConstraints();
315
316 // Setup the constraints between this and its partner.
317 void SetupPartnerConstraints(TabVector *partner);
318
319 // Use the constraints to modify the top and bottom.
320 void ApplyConstraints();
321
322 // Merge close tab vectors of the same side that overlap.
323 static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
324 BlobGrid *grid);
325
326 // Return true if this vector is the same side, overlaps, and close
327 // enough to the other to be merged.
328 bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const;
329
330 // Eat the other TabVector into this and delete it.
331 void MergeWith(const ICOORD &vertical, TabVector *other);
332
333 // Add a new element to the list of partner TabVectors.
334 // Partners must be added in order of increasing y coordinate of the text line
335 // that makes them partners.
336 // Groups of identical partners are merged into one.
337 void AddPartner(TabVector *partner);
338
339 // Return true if other is a partner of this.
340 bool IsAPartner(const TabVector *other);
341
342 // Print basic information about this tab vector.
343 void Print(const char *prefix);
344
345 // Print basic information about this tab vector and every box in it.
346 void Debug(const char *prefix);
347
348 // Draw this tabvector in place in the given window.
349 void Display(ScrollView *tab_win);
350
351 // Refit the line and/or re-evaluate the vector if the dirty flags are set.
352 void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder);
353
354 // Evaluate the vector in terms of coverage of its length by good-looking
355 // box edges. A good looking box is one where its nearest neighbour on the
356 // inside is nearer than half the distance its nearest neighbour on the
357 // outside of the putative column. Bad boxes are removed from the line.
358 // A second pass then further filters boxes by requiring that the gutter
359 // width be a minimum fraction of the mean gutter along the line.
360 void Evaluate(const ICOORD &vertical, TabFind *finder);
361
362 // (Re)Fit a line to the stored points. Returns false if the line
363 // is degenerate. Although the TabVector code mostly doesn't care about the
364 // direction of lines, XAtY would give silly results for a horizontal line.
365 // The class is mostly aimed at use for vertical lines representing
366 // horizontal tab stops.
367 bool Fit(ICOORD vertical, bool force_parallel);
368
369 // Return the partner of this TabVector if the vector qualifies as
370 // being a vertical text line, otherwise nullptr.
371 TabVector *VerticalTextlinePartner();
372
373 // Return the matching tabvector if there is exactly one partner, or
374 // nullptr otherwise. This can be used after matching is done, eg. by
375 // VerticalTextlinePartner(), without checking if the line is vertical.
376 TabVector *GetSinglePartner();
377
378 private:
379 // Constructor is private as the static factory is the external way
380 // to build a TabVector.
381 TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST *boxes);
382
383 // Delete this, but first, repoint all the partners to point to
384 // replacement. If replacement is nullptr, then partner relationships
385 // are removed.
386 void Delete(TabVector *replacement);
387
388 private:
389 // The bottom of the tab line.
390 ICOORD startpt_;
391 // The top of the tab line.
392 ICOORD endpt_;
393 // The lowest y that the vector might extend to.
394 int extended_ymin_ = 0;
395 // The highest y that the vector might extend to.
396 int extended_ymax_ = 0;
397 // Perpendicular distance of vector from a given vertical for sorting.
398 int sort_key_ = 0;
399 // Result of Evaluate 0-100. Coverage of line with good boxes.
400 int percent_score_ = 0;
401 // The mean width of the blobs. Meaningful only for separator lines.
402 int mean_width_ = 0;
403 // True if the boxes_ list has been modified, so a refit is needed.
404 bool needs_refit_ = false;
405 // True if a fit has been done, so re-evaluation is needed.
406 bool needs_evaluation_ = false;
407 // True if a separator line intersects at least 2 other lines.
408 bool intersects_other_lines_ = false;
409 // The type of this TabVector.
410 TabAlignment alignment_ = TA_LEFT_ALIGNED;
411 // The list of boxes whose edges are aligned at this TabVector.
412 BLOBNBOX_CLIST boxes_;
413 // List of TabVectors that have a connection with this via a text line.
414 TabVector_CLIST partners_;
415 // Constraints used to resolve the exact location of the top and bottom
416 // of the tab line.
417 TabConstraint_LIST *top_constraints_ = nullptr;
418 TabConstraint_LIST *bottom_constraints_ = nullptr;
419 };
420
421 } // namespace tesseract.
422
423 #endif // TESSERACT_TEXTORD_TABVECTOR_H_
424