1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 
14 #include "colpartition.h"
15 #include "colpartitiongrid.h"
16 #include "tablefind.h"
17 
18 #include "include_gunit.h"
19 
20 namespace tesseract {
21 
22 class TestableTableFinder : public tesseract::TableFinder {
23 public:
24   using TableFinder::GapInXProjection;
25   using TableFinder::HasLeaderAdjacent;
26   using TableFinder::InsertLeaderPartition;
27   using TableFinder::InsertTextPartition;
28   using TableFinder::set_global_median_blob_width;
29   using TableFinder::set_global_median_ledding;
30   using TableFinder::set_global_median_xheight;
31   using TableFinder::SplitAndInsertFragmentedTextPartition;
32 
ExpectPartition(const TBOX & box)33   void ExpectPartition(const TBOX &box) {
34     tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_);
35     gsearch.SetUniqueMode(true);
36     gsearch.StartFullSearch();
37     ColPartition *part = nullptr;
38     bool found = false;
39     while ((part = gsearch.NextFullSearch()) != nullptr) {
40       if (part->bounding_box().left() == box.left() &&
41           part->bounding_box().bottom() == box.bottom() &&
42           part->bounding_box().right() == box.right() && part->bounding_box().top() == box.top()) {
43         found = true;
44       }
45     }
46     EXPECT_TRUE(found);
47   }
ExpectPartitionCount(int expected_count)48   void ExpectPartitionCount(int expected_count) {
49     tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_);
50     gsearch.SetUniqueMode(true);
51     gsearch.StartFullSearch();
52     ColPartition *part = nullptr;
53     int count = 0;
54     while ((part = gsearch.NextFullSearch()) != nullptr) {
55       ++count;
56     }
57     EXPECT_EQ(expected_count, count);
58   }
59 };
60 
61 class TableFinderTest : public testing::Test {
62 protected:
SetUp()63   void SetUp() override {
64     std::locale::global(std::locale(""));
65     free_boxes_it_.set_to_list(&free_boxes_);
66     finder_ = std::make_unique<TestableTableFinder>();
67     finder_->Init(1, ICOORD(0, 0), ICOORD(500, 500));
68     // gap finding
69     finder_->set_global_median_xheight(5);
70     finder_->set_global_median_blob_width(5);
71   }
72 
TearDown()73   void TearDown() override {
74     if (partition_.get() != nullptr) {
75       partition_->DeleteBoxes();
76     }
77     DeletePartitionListBoxes();
78     finder_.reset(nullptr);
79   }
80 
MakePartition(int x_min,int y_min,int x_max,int y_max)81   void MakePartition(int x_min, int y_min, int x_max, int y_max) {
82     MakePartition(x_min, y_min, x_max, y_max, 0, 0);
83   }
84 
MakePartition(int x_min,int y_min,int x_max,int y_max,int first_column,int last_column)85   void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column,
86                      int last_column) {
87     if (partition_.get() != nullptr) {
88       partition_->DeleteBoxes();
89     }
90     TBOX box;
91     box.set_to_given_coords(x_min, y_min, x_max, y_max);
92     partition_.reset(ColPartition::FakePartition(box, PT_UNKNOWN, BRT_UNKNOWN, BTFT_NONE));
93     partition_->set_first_column(first_column);
94     partition_->set_last_column(last_column);
95   }
96 
InsertTextPartition(ColPartition * part)97   void InsertTextPartition(ColPartition *part) {
98     finder_->InsertTextPartition(part);
99     free_boxes_it_.add_after_then_move(part);
100   }
101 
InsertLeaderPartition(int x_min,int y_min,int x_max,int y_max)102   void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max) {
103     InsertLeaderPartition(x_min, y_min, x_max, y_max, 0, 0);
104   }
105 
InsertLeaderPartition(int x_min,int y_min,int x_max,int y_max,int first_column,int last_column)106   void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max, int first_column,
107                              int last_column) {
108     TBOX box;
109     box.set_to_given_coords(x_min, y_min, x_max, y_max);
110     ColPartition *part =
111         ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_UNKNOWN, BTFT_LEADER);
112     part->set_first_column(first_column);
113     part->set_last_column(last_column);
114     finder_->InsertLeaderPartition(part);
115     free_boxes_it_.add_after_then_move(part);
116   }
117 
DeletePartitionListBoxes()118   void DeletePartitionListBoxes() {
119     for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list(); free_boxes_it_.forward()) {
120       ColPartition *part = free_boxes_it_.data();
121       part->DeleteBoxes();
122     }
123   }
124 
125   std::unique_ptr<TestableTableFinder> finder_;
126   std::unique_ptr<ColPartition> partition_;
127 
128 private:
129   tesseract::ColPartition_CLIST free_boxes_;
130   tesseract::ColPartition_C_IT free_boxes_it_;
131 };
132 
TEST_F(TableFinderTest,GapInXProjectionNoGap)133 TEST_F(TableFinderTest, GapInXProjectionNoGap) {
134   int data[100];
135   for (int &i : data) {
136     i = 10;
137   }
138   EXPECT_FALSE(finder_->GapInXProjection(data, 100));
139 }
140 
TEST_F(TableFinderTest,GapInXProjectionEdgeGap)141 TEST_F(TableFinderTest, GapInXProjectionEdgeGap) {
142   int data[100];
143   for (int i = 0; i < 10; ++i) {
144     data[i] = 2;
145   }
146   for (int i = 10; i < 90; ++i) {
147     data[i] = 10;
148   }
149   for (int i = 90; i < 100; ++i) {
150     data[i] = 2;
151   }
152   EXPECT_FALSE(finder_->GapInXProjection(data, 100));
153 }
154 
TEST_F(TableFinderTest,GapInXProjectionExists)155 TEST_F(TableFinderTest, GapInXProjectionExists) {
156   int data[100];
157   for (int i = 0; i < 10; ++i) {
158     data[i] = 10;
159   }
160   for (int i = 10; i < 90; ++i) {
161     data[i] = 2;
162   }
163   for (int i = 90; i < 100; ++i) {
164     data[i] = 10;
165   }
166   EXPECT_TRUE(finder_->GapInXProjection(data, 100));
167 }
168 
TEST_F(TableFinderTest,HasLeaderAdjacentOverlapping)169 TEST_F(TableFinderTest, HasLeaderAdjacentOverlapping) {
170   InsertLeaderPartition(90, 0, 150, 5);
171   MakePartition(0, 0, 100, 10);
172   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
173   MakePartition(0, 25, 100, 40);
174   EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
175   MakePartition(145, 0, 200, 20);
176   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
177   MakePartition(40, 0, 50, 4);
178   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
179 }
180 
TEST_F(TableFinderTest,HasLeaderAdjacentNoOverlap)181 TEST_F(TableFinderTest, HasLeaderAdjacentNoOverlap) {
182   InsertLeaderPartition(90, 10, 150, 15);
183   MakePartition(0, 10, 85, 20);
184   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
185   MakePartition(0, 25, 100, 40);
186   EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
187   MakePartition(0, 0, 100, 10);
188   EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
189   // TODO(nbeato): is this a useful metric? case fails
190   // MakePartition(160, 0, 200, 15);  // leader is primarily above it
191   // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
192 }
193 
TEST_F(TableFinderTest,HasLeaderAdjacentPreservesColumns)194 TEST_F(TableFinderTest, HasLeaderAdjacentPreservesColumns) {
195   InsertLeaderPartition(90, 0, 150, 5, 1, 2);
196   MakePartition(0, 0, 85, 10, 0, 0);
197   EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
198   MakePartition(0, 0, 100, 10, 0, 1);
199   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
200   MakePartition(0, 0, 200, 10, 0, 5);
201   EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
202   MakePartition(155, 0, 200, 10, 5, 5);
203   EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
204 }
205 
206 // TODO(nbeato): Only testing a splitting case. Add more...
207 // Also test non-split cases.
TEST_F(TableFinderTest,SplitAndInsertFragmentedPartitionsBasicPass)208 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) {
209   finder_->set_global_median_blob_width(3);
210   finder_->set_global_median_xheight(10);
211 
212   TBOX part_box(10, 5, 100, 15);
213   auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
214   all->set_type(PT_FLOWING_TEXT);
215   all->set_blob_type(BRT_TEXT);
216   all->set_flow(BTFT_CHAIN);
217   all->set_left_margin(10);
218   all->set_right_margin(100);
219   TBOX blob_box = part_box;
220   for (int i = 10; i <= 20; i += 5) {
221     blob_box.set_left(i + 1);
222     blob_box.set_right(i + 4);
223     all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
224   }
225   for (int i = 35; i <= 55; i += 5) {
226     blob_box.set_left(i + 1);
227     blob_box.set_right(i + 4);
228     all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
229   }
230   for (int i = 80; i <= 95; i += 5) {
231     blob_box.set_left(i + 1);
232     blob_box.set_right(i + 4);
233     all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
234   }
235   // TODO(nbeato): Ray's newer code...
236   // all->ClaimBoxes();
237   all->ComputeLimits();     // This is to make sure median iinfo is set.
238   InsertTextPartition(all); // This is to delete blobs
239   ColPartition *fragment_me = all->CopyButDontOwnBlobs();
240 
241   finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
242   finder_->ExpectPartition(TBOX(11, 5, 24, 15));
243   finder_->ExpectPartition(TBOX(36, 5, 59, 15));
244   finder_->ExpectPartition(TBOX(81, 5, 99, 15));
245   finder_->ExpectPartitionCount(3);
246 }
247 
TEST_F(TableFinderTest,SplitAndInsertFragmentedPartitionsBasicFail)248 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) {
249   finder_->set_global_median_blob_width(3);
250   finder_->set_global_median_xheight(10);
251 
252   TBOX part_box(10, 5, 100, 15);
253   auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
254   all->set_type(PT_FLOWING_TEXT);
255   all->set_blob_type(BRT_TEXT);
256   all->set_flow(BTFT_CHAIN);
257   all->set_left_margin(10);
258   all->set_right_margin(100);
259   TBOX blob_box = part_box;
260   for (int i = 10; i <= 95; i += 5) {
261     blob_box.set_left(i + 1);
262     blob_box.set_right(i + 4);
263     all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
264   }
265   // TODO(nbeato): Ray's newer code...
266   // all->ClaimBoxes();
267   all->ComputeLimits();     // This is to make sure median iinfo is set.
268   InsertTextPartition(all); // This is to delete blobs
269   ColPartition *fragment_me = all->CopyButDontOwnBlobs();
270 
271   finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
272   finder_->ExpectPartition(TBOX(11, 5, 99, 15));
273   finder_->ExpectPartitionCount(1);
274 }
275 
276 } // namespace tesseract
277