1 ///////////////////////////////////////////////////////////////////////
2 // File:        boxword.cpp
3 // Description: Class to represent the bounding boxes of the output.
4 // Author:      Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #include "boxword.h"
20 #include "blobs.h"
21 #include "host.h" // for NearlyEqual
22 #include "normalis.h"
23 #include "ocrblock.h"
24 #include "pageres.h"
25 
26 namespace tesseract {
27 
28 // Clip output boxes to input blob boxes for bounds that are within this
29 // tolerance. Otherwise, the blob may be chopped and we have to just use
30 // the word bounding box.
31 const int kBoxClipTolerance = 2;
32 
BoxWord()33 BoxWord::BoxWord() : length_(0) {}
34 
BoxWord(const BoxWord & src)35 BoxWord::BoxWord(const BoxWord &src) {
36   CopyFrom(src);
37 }
38 
operator =(const BoxWord & src)39 BoxWord &BoxWord::operator=(const BoxWord &src) {
40   CopyFrom(src);
41   return *this;
42 }
43 
CopyFrom(const BoxWord & src)44 void BoxWord::CopyFrom(const BoxWord &src) {
45   bbox_ = src.bbox_;
46   length_ = src.length_;
47   boxes_.clear();
48   boxes_.reserve(length_);
49   for (unsigned i = 0; i < length_; ++i) {
50     boxes_.push_back(src.boxes_[i]);
51   }
52 }
53 
54 // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
55 // switch back to original image coordinates.
CopyFromNormalized(TWERD * tessword)56 BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) {
57   auto *boxword = new BoxWord();
58   // Count the blobs.
59   boxword->length_ = tessword->NumBlobs();
60   // Allocate memory.
61   boxword->boxes_.reserve(boxword->length_);
62 
63   for (unsigned b = 0; b < boxword->length_; ++b) {
64     TBLOB *tblob = tessword->blobs[b];
65     TBOX blob_box;
66     for (TESSLINE *outline = tblob->outlines; outline != nullptr;
67          outline = outline->next) {
68       EDGEPT *edgept = outline->loop;
69       // Iterate over the edges.
70       do {
71         if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
72           ICOORD pos(edgept->pos.x, edgept->pos.y);
73           TPOINT denormed;
74           tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed);
75           pos.set_x(denormed.x);
76           pos.set_y(denormed.y);
77           TBOX pt_box(pos, pos);
78           blob_box += pt_box;
79         }
80         edgept = edgept->next;
81       } while (edgept != outline->loop);
82     }
83     boxword->boxes_.push_back(blob_box);
84   }
85   boxword->ComputeBoundingBox();
86   return boxword;
87 }
88 
89 // Clean up the bounding boxes from the polygonal approximation by
90 // expanding slightly, then clipping to the blobs from the original_word
91 // that overlap. If not null, the block provides the inverse rotation.
ClipToOriginalWord(const BLOCK * block,WERD * original_word)92 void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
93   for (unsigned i = 0; i < length_; ++i) {
94     TBOX box = boxes_[i];
95     // Expand by a single pixel, as the poly approximation error is 1 pixel.
96     box =
97         TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
98     // Now find the original box that matches.
99     TBOX original_box;
100     C_BLOB_IT b_it(original_word->cblob_list());
101     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
102       TBOX blob_box = b_it.data()->bounding_box();
103       if (block != nullptr) {
104         blob_box.rotate(block->re_rotation());
105       }
106       if (blob_box.major_overlap(box)) {
107         original_box += blob_box;
108       }
109     }
110     if (!original_box.null_box()) {
111       if (NearlyEqual<int>(original_box.left(), box.left(),
112                            kBoxClipTolerance)) {
113         box.set_left(original_box.left());
114       }
115       if (NearlyEqual<int>(original_box.right(), box.right(),
116                            kBoxClipTolerance)) {
117         box.set_right(original_box.right());
118       }
119       if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
120         box.set_top(original_box.top());
121       }
122       if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
123                            kBoxClipTolerance)) {
124         box.set_bottom(original_box.bottom());
125       }
126     }
127     original_box = original_word->bounding_box();
128     if (block != nullptr) {
129       original_box.rotate(block->re_rotation());
130     }
131     boxes_[i] = box.intersection(original_box);
132   }
133   ComputeBoundingBox();
134 }
135 
136 // Merges the boxes from start to end, not including end, and deletes
137 // the boxes between start and end.
MergeBoxes(unsigned start,unsigned end)138 void BoxWord::MergeBoxes(unsigned start, unsigned end) {
139   start = ClipToRange(start, 0U, length_);
140   end = ClipToRange(end, 0U, length_);
141   if (end <= start + 1) {
142     return;
143   }
144   for (unsigned i = start + 1; i < end; ++i) {
145     boxes_[start] += boxes_[i];
146   }
147   int shrinkage = end - 1 - start;
148   length_ -= shrinkage;
149   for (unsigned i = start + 1; i < length_; ++i) {
150     boxes_[i] = boxes_[i + shrinkage];
151   }
152   boxes_.resize(length_);
153 }
154 
155 // Inserts a new box before the given index.
156 // Recomputes the bounding box.
InsertBox(unsigned index,const TBOX & box)157 void BoxWord::InsertBox(unsigned index, const TBOX &box) {
158   if (index < length_) {
159     boxes_.insert(boxes_.begin() + index, box);
160   } else {
161     boxes_.push_back(box);
162   }
163   length_ = boxes_.size();
164   ComputeBoundingBox();
165 }
166 
167 // Changes the box at the given index to the new box.
168 // Recomputes the bounding box.
ChangeBox(unsigned index,const TBOX & box)169 void BoxWord::ChangeBox(unsigned index, const TBOX &box) {
170   boxes_[index] = box;
171   ComputeBoundingBox();
172 }
173 
174 // Deletes the box with the given index, and shuffles up the rest.
175 // Recomputes the bounding box.
DeleteBox(unsigned index)176 void BoxWord::DeleteBox(unsigned index) {
177   ASSERT_HOST(index < length_);
178   boxes_.erase(boxes_.begin() + index);
179   --length_;
180   ComputeBoundingBox();
181 }
182 
183 // Deletes all the boxes stored in BoxWord.
DeleteAllBoxes()184 void BoxWord::DeleteAllBoxes() {
185   length_ = 0;
186   boxes_.clear();
187   bbox_ = TBOX();
188 }
189 
190 // Computes the bounding box of the word.
ComputeBoundingBox()191 void BoxWord::ComputeBoundingBox() {
192   bbox_ = TBOX();
193   for (unsigned i = 0; i < length_; ++i) {
194     bbox_ += boxes_[i];
195   }
196 }
197 
198 // This and other putatively are the same, so call the (permanent) callback
199 // for each blob index where the bounding boxes match.
200 // The callback is deleted on completion.
ProcessMatchedBlobs(const TWERD & other,const std::function<void (int)> & cb) const201 void BoxWord::ProcessMatchedBlobs(const TWERD &other,
202                                   const std::function<void(int)> &cb) const {
203   for (unsigned i = 0; i < length_ && i < other.NumBlobs(); ++i) {
204     TBOX blob_box = other.blobs[i]->bounding_box();
205     if (blob_box == boxes_[i]) {
206       cb(i);
207     }
208   }
209 }
210 
211 } // namespace tesseract.
212