1 ///////////////////////////////////////////////////////////////////////
2 // File:        ccnontextdetect.cpp
3 // Description: Connected-Component-based photo (non-text) detection.
4 // Author:      rays@google.com (Ray Smith)
5 //
6 // Copyright 2011 Google Inc. All Rights Reserved.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifdef HAVE_CONFIG_H
20 #  include "config_auto.h"
21 #endif
22 
23 #include "ccnontextdetect.h"
24 #include "helpers.h"         // for IntCastRounded
25 #include "imagefind.h"
26 #include "strokewidth.h"
27 
28 namespace tesseract {
29 
30 // Max number of neighbour small objects per squared gridsize before a grid
31 // cell becomes image.
32 const double kMaxSmallNeighboursPerPix = 1.0 / 32;
33 // Max number of small blobs a large blob may overlap before it is rejected
34 // and determined to be image.
35 const int kMaxLargeOverlapsWithSmall = 3;
36 // Max number of small blobs a medium blob may overlap before it is rejected
37 // and determined to be image. Larger than for large blobs as medium blobs
38 // may be complex Chinese characters. Very large Chinese characters are going
39 // to overlap more medium blobs than small.
40 const int kMaxMediumOverlapsWithSmall = 12;
41 // Max number of normal blobs a large blob may overlap before it is rejected
42 // and determined to be image. This is set higher to allow for drop caps, which
43 // may overlap a lot of good text blobs.
44 const int kMaxLargeOverlapsWithMedium = 12;
45 // Multiplier of original noise_count used to test for the case of spreading
46 // noise beyond where it should really be.
47 const int kOriginalNoiseMultiple = 8;
48 // Pixel padding for noise blobs when rendering on the image
49 // mask to encourage them to join together. Make it too big and images
50 // will fatten out too much and have to be clipped to text.
51 const int kNoisePadding = 4;
52 // Fraction of max_noise_count_ to be added to the noise count if there is
53 // photo mask in the background.
54 const double kPhotoOffsetFraction = 0.375;
55 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise
56 // density. Good blobs are supposed to be highly likely real text.
57 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
58 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not
59 // the 1/(4pi) that you would expect.
60 const double kMinGoodTextPARatio = 1.5;
61 
CCNonTextDetect(int gridsize,const ICOORD & bleft,const ICOORD & tright)62 CCNonTextDetect::CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright)
63     : BlobGrid(gridsize, bleft, tright)
64     , max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * gridsize * gridsize))
65     , noise_density_(nullptr) {
66   // TODO(rays) break max_noise_count_ out into an area-proportional
67   // value, as now plus an additive constant for the number of text blobs
68   // in the 3x3 neighbourhood - maybe 9.
69 }
70 
~CCNonTextDetect()71 CCNonTextDetect::~CCNonTextDetect() {
72   delete noise_density_;
73 }
74 
75 // Creates and returns a Pix with the same resolution as the original
76 // in which 1 (black) pixels represent likely non text (photo, line drawing)
77 // areas of the page, deleting from the blob_block the blobs that were
78 // determined to be non-text.
79 // The photo_map is used to bias the decision towards non-text, rather than
80 // supplying definite decision.
81 // The blob_block is the usual result of connected component analysis,
82 // holding the detected blobs.
83 // The returned Pix should be PixDestroyed after use.
ComputeNonTextMask(bool debug,Image photo_map,TO_BLOCK * blob_block)84 Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) {
85   // Insert the smallest blobs into the grid.
86   InsertBlobList(&blob_block->small_blobs);
87   InsertBlobList(&blob_block->noise_blobs);
88   // Add the medium blobs that don't have a good strokewidth neighbour.
89   // Those that do go into good_grid as an antidote to spreading beyond the
90   // real reaches of a noise region.
91   BlobGrid good_grid(gridsize(), bleft(), tright());
92   BLOBNBOX_IT blob_it(&blob_block->blobs);
93   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
94     BLOBNBOX *blob = blob_it.data();
95     double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
96     perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
97     if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) {
98       InsertBBox(true, true, blob);
99     } else {
100       good_grid.InsertBBox(true, true, blob);
101     }
102   }
103   noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
104   good_grid.Clear(); // Not needed any more.
105   Image pix = noise_density_->ThresholdToPix(max_noise_count_);
106   if (debug) {
107     pixWrite("junknoisemask.png", pix, IFF_PNG);
108   }
109   ScrollView *win = nullptr;
110 #ifndef GRAPHICS_DISABLED
111   if (debug) {
112     win = MakeWindow(0, 400, "Photo Mask Blobs");
113   }
114 #endif // !GRAPHICS_DISABLED
115   // Large and medium blobs are not text if they overlap with "a lot" of small
116   // blobs.
117   MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithSmall, win,
118                             ScrollView::DARK_GREEN, pix);
119   MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, win, ScrollView::WHITE,
120                             pix);
121   // Clear the grid of small blobs and insert the medium blobs.
122   Clear();
123   InsertBlobList(&blob_block->blobs);
124   MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithMedium, win,
125                             ScrollView::DARK_GREEN, pix);
126   // Clear again before we start deleting the blobs in the grid.
127   Clear();
128   MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, win, ScrollView::CORAL, pix);
129   MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, win, ScrollView::GOLDENROD, pix);
130   MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, win, ScrollView::WHITE, pix);
131   if (debug) {
132 #ifndef GRAPHICS_DISABLED
133     win->Update();
134 #endif // !GRAPHICS_DISABLED
135     pixWrite("junkccphotomask.png", pix, IFF_PNG);
136 #ifndef GRAPHICS_DISABLED
137     delete win->AwaitEvent(SVET_DESTROY);
138     delete win;
139 #endif // !GRAPHICS_DISABLED
140   }
141   return pix;
142 }
143 
144 // Computes and returns the noise_density IntGrid, at the same gridsize as
145 // this by summing the number of small elements in a 3x3 neighbourhood of
146 // each grid cell. good_grid is filled with blobs that are considered most
147 // likely good text, and this is filled with small and medium blobs that are
148 // more likely non-text.
149 // The photo_map is used to bias the decision towards non-text, rather than
150 // supplying definite decision.
ComputeNoiseDensity(bool debug,Image photo_map,BlobGrid * good_grid)151 IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) {
152   IntGrid *noise_counts = CountCellElements();
153   IntGrid *noise_density = noise_counts->NeighbourhoodSum();
154   IntGrid *good_counts = good_grid->CountCellElements();
155   // Now increase noise density in photo areas, to bias the decision and
156   // minimize hallucinated text on image, but trim the noise_density where
157   // there are good blobs and the original count is low in non-photo areas,
158   // indicating that most of the result came from neighbouring cells.
159   int height = pixGetHeight(photo_map);
160   int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
161   for (int y = 0; y < gridheight(); ++y) {
162     for (int x = 0; x < gridwidth(); ++x) {
163       int noise = noise_density->GridCellValue(x, y);
164       if (max_noise_count_ < noise + photo_offset && noise <= max_noise_count_) {
165         // Test for photo.
166         int left = x * gridsize();
167         int right = left + gridsize();
168         int bottom = height - y * gridsize();
169         int top = bottom - gridsize();
170         if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, &bottom)) {
171           noise_density->SetGridCell(x, y, noise + photo_offset);
172         }
173       }
174       if (debug && noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0) {
175         tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", x * gridsize(), y * gridsize(),
176                 noise_density->GridCellValue(x, y), good_counts->GridCellValue(x, y),
177                 noise_counts->GridCellValue(x, y), max_noise_count_);
178       }
179       if (noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0 &&
180           noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= max_noise_count_) {
181         noise_density->SetGridCell(x, y, 0);
182       }
183     }
184   }
185   delete noise_counts;
186   delete good_counts;
187   return noise_density;
188 }
189 
190 // Helper to expand a box in one of the 4 directions by the given pad,
191 // provided it does not expand into any cell with a zero noise density.
192 // If that is not possible, try expanding all round by a small constant.
AttemptBoxExpansion(const TBOX & box,const IntGrid & noise_density,int pad)193 static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, int pad) {
194   TBOX expanded_box(box);
195   expanded_box.set_right(box.right() + pad);
196   if (!noise_density.AnyZeroInRect(expanded_box)) {
197     return expanded_box;
198   }
199   expanded_box = box;
200   expanded_box.set_left(box.left() - pad);
201   if (!noise_density.AnyZeroInRect(expanded_box)) {
202     return expanded_box;
203   }
204   expanded_box = box;
205   expanded_box.set_top(box.top() + pad);
206   if (!noise_density.AnyZeroInRect(expanded_box)) {
207     return expanded_box;
208   }
209   expanded_box = box;
210   expanded_box.set_bottom(box.bottom() + pad);
211   if (!noise_density.AnyZeroInRect(expanded_box)) {
212     return expanded_box;
213   }
214   expanded_box = box;
215   expanded_box.pad(kNoisePadding, kNoisePadding);
216   if (!noise_density.AnyZeroInRect(expanded_box)) {
217     return expanded_box;
218   }
219   return box;
220 }
221 
222 // Tests each blob in the list to see if it is certain non-text using 2
223 // conditions:
224 // 1. blob overlaps a cell with high value in noise_density_ (previously set
225 // by ComputeNoiseDensity).
226 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
227 // condition is disabled with max_blob_overlaps == -1.
228 // If it does, the blob is declared non-text, and is used to mark up the
229 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
230 // neighbours reset, as they may now point to deleted data.
231 // WARNING: The blobs list blobs may be in the *this grid, but they are
232 // not removed. If any deleted blobs might be in *this, then this must be
233 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
234 // If the win is not nullptr, deleted blobs are drawn on it in red, and kept
235 // blobs are drawn on it in ok_color.
MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST * blobs,int max_blob_overlaps,ScrollView * win,ScrollView::Color ok_color,Image nontext_mask)236 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps,
237                                                 ScrollView *win, ScrollView::Color ok_color,
238                                                 Image nontext_mask) {
239   int imageheight = tright().y() - bleft().x();
240   BLOBNBOX_IT blob_it(blobs);
241   BLOBNBOX_LIST dead_blobs;
242   BLOBNBOX_IT dead_it(&dead_blobs);
243   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
244     BLOBNBOX *blob = blob_it.data();
245     TBOX box = blob->bounding_box();
246     if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
247         (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
248       blob->ClearNeighbours();
249 #ifndef GRAPHICS_DISABLED
250       if (win != nullptr) {
251         blob->plot(win, ok_color, ok_color);
252       }
253 #endif // !GRAPHICS_DISABLED
254     } else {
255       if (noise_density_->AnyZeroInRect(box)) {
256         // There is a danger that the bounding box may overlap real text, so
257         // we need to render the outline.
258         Image blob_pix = blob->cblob()->render_outline();
259         pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
260                     PIX_SRC | PIX_DST, blob_pix, 0, 0);
261         blob_pix.destroy();
262       } else {
263         if (box.area() < gridsize() * gridsize()) {
264           // It is a really bad idea to make lots of small components in the
265           // photo mask, so try to join it to a bigger area by expanding the
266           // box in a way that does not touch any zero noise density cell.
267           box = AttemptBoxExpansion(box, *noise_density_, gridsize());
268         }
269         // All overlapped cells are non-zero, so just mark the rectangle.
270         pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
271                     PIX_SET, nullptr, 0, 0);
272       }
273 #ifndef GRAPHICS_DISABLED
274       if (win != nullptr) {
275         blob->plot(win, ScrollView::RED, ScrollView::RED);
276       }
277 #endif // !GRAPHICS_DISABLED
278       // It is safe to delete the cblob now, as it isn't used by the grid
279       // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
280       // dead_blobs list.
281       // TODO: remove next line, currently still needed for resultiterator_test.
282       delete blob->remove_cblob();
283       dead_it.add_to_end(blob_it.extract());
284     }
285   }
286 }
287 
288 // Returns true if the given blob overlaps more than max_overlaps blobs
289 // in the current grid.
BlobOverlapsTooMuch(BLOBNBOX * blob,int max_overlaps)290 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps) {
291   // Search the grid to see what intersects it.
292   // Setup a Rectangle search for overlapping this blob.
293   BlobGridSearch rsearch(this);
294   const TBOX &box = blob->bounding_box();
295   rsearch.StartRectSearch(box);
296   rsearch.SetUniqueMode(true);
297   BLOBNBOX *neighbour;
298   int overlap_count = 0;
299   while (overlap_count <= max_overlaps && (neighbour = rsearch.NextRectSearch()) != nullptr) {
300     if (box.major_overlap(neighbour->bounding_box())) {
301       ++overlap_count;
302       if (overlap_count > max_overlaps) {
303         return true;
304       }
305     }
306   }
307   return false;
308 }
309 
310 } // namespace tesseract.
311