1 /*
2     Scan Tailor - Interactive post-processing tool for scanned pages.
3     Copyright (C)  Joseph Artsimovich <joseph.artsimovich@gmail.com>
4 
5     This program is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "Despeckle.h"
20 #include <QDebug>
21 #include <QImage>
22 #include <cmath>
23 #include <unordered_map>
24 #include "DebugImages.h"
25 #include "Dpi.h"
26 #include "FastQueue.h"
27 #include "TaskStatus.h"
28 #include "imageproc/BinaryImage.h"
29 #include "imageproc/ConnectivityMap.h"
30 
31 /**
32  * \file
33  * The idea of this despeckling algorithm is as follows:
34  * \li The connected components that are larger than the specified threshold
35  * are marked as non-garbage.
36  * \li If a connected component is close enough to a non-garbage component
37  * and their sizes are comparable or the non-garbage one is larger, then
38  * the other one is also marked as non-garbage.
39  *
40  * The last step may be repeated until no new components are marked.
41  * as non-garbage.
42  */
43 
44 using namespace imageproc;
45 
46 namespace {
47 /**
48  * We treat vertical distances differently from the horizontal ones.
49  * We want horizontal proximity to have greater weight, so we
50  * multiply the vertical component distances by VERTICAL_SCALE,
51  * so that the distance is not:\n
52  * std::sqrt(dx^2 + dy^2)\n
53  * but:\n
54  * std::sqrt(dx^2 + (VERTICAL_SCALE*dy)^2)\n
55  * Keep in mind that we actually operate on squared distances,
56  * so we don't need to take that square root.
57  */
58 const int VERTICAL_SCALE = 2;
59 const int VERTICAL_SCALE_SQ = VERTICAL_SCALE * VERTICAL_SCALE;
60 
61 struct Settings {
62   /**
63    * When multiplied by the number of pixels in a connected component,
64    * gives the minimum size (in terms of the number of pixels) of a connected
65    * component we may attach it to.
66    */
67   double minRelativeParentWeight;
68 
69   /**
70    * When multiplied by the number of pixels in a connected component,
71    * gives the maximum squared distance to another connected component
72    * we may attach it to.
73    */
74   uint32_t pixelsToSqDist;
75 
76   /**
77    * Defines the minimum width or height in pixels that will guarantee
78    * the object won't be removed.
79    */
80   int bigObjectThreshold;
81 
82   static Settings get(Despeckle::Level level, const Dpi& dpi);
83 
84   static Settings get(double level, const Dpi& dpi);
85 };
86 
get(const Despeckle::Level level,const Dpi & dpi)87 Settings Settings::get(const Despeckle::Level level, const Dpi& dpi) {
88   Settings settings{};
89 
90   const int min_dpi = std::min(dpi.horizontal(), dpi.vertical());
91   const double dpi_factor = min_dpi / 300.0;
92   // To silence compiler's warnings.
93   settings.minRelativeParentWeight = 0;
94   settings.pixelsToSqDist = 0;
95   settings.bigObjectThreshold = 0;
96 
97   switch (level) {
98     case Despeckle::CAUTIOUS:
99       settings.minRelativeParentWeight = 0.125 * dpi_factor;
100       settings.pixelsToSqDist = static_cast<uint32_t>(std::pow(10.0, 2));
101       settings.bigObjectThreshold = qRound(7 * dpi_factor);
102       break;
103     case Despeckle::NORMAL:
104       settings.minRelativeParentWeight = 0.175 * dpi_factor;
105       settings.pixelsToSqDist = static_cast<uint32_t>(std::pow(6.5, 2));
106       settings.bigObjectThreshold = qRound(12 * dpi_factor);
107       break;
108     case Despeckle::AGGRESSIVE:
109       settings.minRelativeParentWeight = 0.225 * dpi_factor;
110       settings.pixelsToSqDist = static_cast<uint32_t>(std::pow(3.5, 2));
111       settings.bigObjectThreshold = qRound(17 * dpi_factor);
112       break;
113   }
114 
115   return settings;
116 }
117 
get(const double level,const Dpi & dpi)118 Settings Settings::get(const double level, const Dpi& dpi) {
119   Settings settings{};
120 
121   const int min_dpi = std::min(dpi.horizontal(), dpi.vertical());
122   const double dpi_factor = min_dpi / 300.0;
123 
124   settings.minRelativeParentWeight = (0.05 * level + 0.075) * dpi_factor;
125   settings.pixelsToSqDist = static_cast<uint32_t>(std::pow(0.25 * std::pow(level, 2) - 4.25 * level + 14, 2));
126   settings.bigObjectThreshold = qRound((5 * level + 2) * dpi_factor);
127 
128   return settings;
129 }
130 
131 struct Component {
132   static const uint32_t ANCHORED_TO_BIG = uint32_t(1) << 31;
133   static const uint32_t ANCHORED_TO_SMALL = uint32_t(1) << 30;
134   static const uint32_t TAG_MASK = ANCHORED_TO_BIG | ANCHORED_TO_SMALL;
135 
136   /**
137    * Lower 30 bits: the number of pixels in the connected component.
138    * Higher 2 bits: tags.
139    */
140   uint32_t num_pixels;
141 
Component__anonb0e8a5fd0111::Component142   Component() : num_pixels(0) {}
143 
anchoredToBig__anonb0e8a5fd0111::Component144   const uint32_t anchoredToBig() const { return num_pixels & ANCHORED_TO_BIG; }
145 
setAnchoredToBig__anonb0e8a5fd0111::Component146   void setAnchoredToBig() { num_pixels |= ANCHORED_TO_BIG; }
147 
anchoredToSmall__anonb0e8a5fd0111::Component148   const uint32_t anchoredToSmall() const { return num_pixels & ANCHORED_TO_SMALL; }
149 
setAnchoredToSmall__anonb0e8a5fd0111::Component150   void setAnchoredToSmall() { num_pixels |= ANCHORED_TO_SMALL; }
151 
anchoredToSmallButNotBig__anonb0e8a5fd0111::Component152   const bool anchoredToSmallButNotBig() const { return (num_pixels & TAG_MASK) == ANCHORED_TO_SMALL; }
153 
clearTags__anonb0e8a5fd0111::Component154   void clearTags() { num_pixels &= ~TAG_MASK; }
155 };
156 
157 const uint32_t Component::ANCHORED_TO_BIG;
158 const uint32_t Component::ANCHORED_TO_SMALL;
159 const uint32_t Component::TAG_MASK;
160 
161 struct BoundingBox {
162   int top;
163   int left;
164   int bottom;
165   int right;
166 
BoundingBox__anonb0e8a5fd0111::BoundingBox167   BoundingBox() {
168     top = left = std::numeric_limits<int>::max();
169     bottom = right = std::numeric_limits<int>::min();
170   }
171 
width__anonb0e8a5fd0111::BoundingBox172   int width() const { return right - left + 1; }
173 
height__anonb0e8a5fd0111::BoundingBox174   int height() const { return bottom - top + 1; }
175 
extend__anonb0e8a5fd0111::BoundingBox176   void extend(int x, int y) {
177     top = std::min(top, y);
178     left = std::min(left, x);
179     bottom = std::max(bottom, y);
180     right = std::max(right, x);
181   }
182 };
183 
184 struct Vector {
185   int16_t x;
186   int16_t y;
187 };
188 
189 union Distance {
190   Vector vec;
191   uint32_t raw;
192 
zero()193   static Distance zero() {
194     Distance dist{};
195     dist.raw = 0;
196 
197     return dist;
198   }
199 
special()200   static Distance special() {
201     Distance dist{};
202     dist.vec.x = dist.vec.y = std::numeric_limits<int16_t>::max();
203 
204     return dist;
205   }
206 
operator ==(const Distance & other) const207   bool operator==(const Distance& other) const { return raw == other.raw; }
208 
operator !=(const Distance & other) const209   bool operator!=(const Distance& other) const { return raw != other.raw; }
210 
reset(int x)211   void reset(int x) {
212     vec.x = static_cast<int16_t>(std::numeric_limits<int16_t>::max() - x);
213     vec.y = 0;
214   }
215 
sqdist() const216   uint32_t sqdist() const {
217     const int x = vec.x;
218     const int y = vec.y;
219 
220     return static_cast<uint32_t>(x * x + VERTICAL_SCALE_SQ * y * y);
221   }
222 };
223 
224 /**
225  * \brief A bidirectional association between two connected components.
226  */
227 struct Connection {
228   uint32_t lesser_label;
229   uint32_t greater_label;
230 
Connection__anonb0e8a5fd0111::Connection231   Connection(uint32_t lbl1, uint32_t lbl2) {
232     if (lbl1 < lbl2) {
233       lesser_label = lbl1;
234       greater_label = lbl2;
235     } else {
236       lesser_label = lbl2;
237       greater_label = lbl1;
238     }
239   }
240 
operator <__anonb0e8a5fd0111::Connection241   bool operator<(const Connection& rhs) const {
242     if (lesser_label < rhs.lesser_label) {
243       return true;
244     } else if (lesser_label > rhs.lesser_label) {
245       return false;
246     } else {
247       return greater_label < rhs.greater_label;
248     }
249   }
250 
operator ==__anonb0e8a5fd0111::Connection251   bool operator==(const Connection& other) const {
252     return (lesser_label == other.lesser_label) && (greater_label == other.greater_label);
253   }
254 
255   struct hash {
operator ()__anonb0e8a5fd0111::Connection::hash256     std::size_t operator()(const Connection& connection) const noexcept {
257       return std::hash<int>()(connection.lesser_label) ^ std::hash<int>()(connection.greater_label << 1);
258     }
259   };
260 };
261 
262 /**
263  * \brief A directional assiciation between two connected components.
264  */
265 struct TargetSourceConn {
266   uint32_t target;
267 
268   /**< The label of the target connected component. */
269   uint32_t source;
270 
271   /**< The label of the source connected component. */
272 
TargetSourceConn__anonb0e8a5fd0111::TargetSourceConn273   TargetSourceConn(uint32_t tgt, uint32_t src) : target(tgt), source(src) {}
274 
275   /**
276    * The ordering is by target then source.  It's designed to be able
277    * to quickly locate all associations involving a specific target.
278    */
operator <__anonb0e8a5fd0111::TargetSourceConn279   bool operator<(const TargetSourceConn& rhs) const {
280     if (target < rhs.target) {
281       return true;
282     } else if (target > rhs.target) {
283       return false;
284     } else {
285       return source < rhs.source;
286     }
287   }
288 };
289 
290 /**
291  * \brief If the association didn't exist, create it,
292  *        otherwise the minimum distance.
293  */
updateDistance(std::unordered_map<Connection,uint32_t,Connection::hash> & conns,uint32_t label1,uint32_t label2,uint32_t sqdist)294 void updateDistance(std::unordered_map<Connection, uint32_t, Connection::hash>& conns,
295                     uint32_t label1,
296                     uint32_t label2,
297                     uint32_t sqdist) {
298   typedef std::unordered_map<Connection, uint32_t, Connection::hash> Connections;
299 
300   const Connection conn(label1, label2);
301   auto it(conns.find(conn));
302   if (it == conns.end()) {
303     conns.insert(Connections::value_type(conn, sqdist));
304   } else if (sqdist < it->second) {
305     it->second = sqdist;
306   }
307 }
308 
309 /**
310  * \brief Tag the source component with ANCHORED_TO_SMALL, ANCHORED_TO_BIG
311  *        or none of the above.
312  */
tagSourceComponent(Component & source,const Component & target,uint32_t sqdist,const Settings & settings)313 void tagSourceComponent(Component& source, const Component& target, uint32_t sqdist, const Settings& settings) {
314   if (source.anchoredToBig()) {
315     // No point in setting ANCHORED_TO_SMALL.
316     return;
317   }
318 
319   if (sqdist > source.num_pixels * settings.pixelsToSqDist) {
320     // Too far.
321     return;
322   }
323 
324   if (target.num_pixels >= settings.minRelativeParentWeight * source.num_pixels) {
325     source.setAnchoredToBig();
326   } else {
327     source.setAnchoredToSmall();
328   }
329 }
330 
331 /**
332  * Check if the component may be attached to another one.
333  * Attaching a component to another one will preserve the component
334  * being attached, provided that the one it's attached to is also preserved.
335  */
canBeAttachedTo(const Component & comp,const Component & target,uint32_t sqdist,const Settings & settings)336 bool canBeAttachedTo(const Component& comp, const Component& target, uint32_t sqdist, const Settings& settings) {
337   if (sqdist <= comp.num_pixels * settings.pixelsToSqDist) {
338     if (target.num_pixels >= comp.num_pixels * settings.minRelativeParentWeight) {
339       return true;
340     }
341   }
342 
343   return false;
344 }
345 
voronoi(ConnectivityMap & cmap,std::vector<Distance> & dist)346 void voronoi(ConnectivityMap& cmap, std::vector<Distance>& dist) {
347   const int width = cmap.size().width() + 2;
348   const int height = cmap.size().height() + 2;
349 
350   assert(dist.empty());
351   dist.resize(width * height, Distance::zero());
352 
353   std::vector<uint32_t> sqdists(width * 2, 0);
354   uint32_t* prev_sqdist_line = &sqdists[0];
355   uint32_t* this_sqdist_line = &sqdists[width];
356 
357   Distance* dist_line = &dist[0];
358   uint32_t* cmap_line = cmap.paddedData();
359 
360   dist_line[0].reset(0);
361   prev_sqdist_line[0] = dist_line[0].sqdist();
362   for (int x = 1; x < width; ++x) {
363     dist_line[x].vec.x = static_cast<int16_t>(dist_line[x - 1].vec.x - 1);
364     prev_sqdist_line[x] = prev_sqdist_line[x - 1] - (int(dist_line[x - 1].vec.x) << 1) + 1;
365   }
366 
367   // Top to bottom scan.
368   for (int y = 1; y < height; ++y) {
369     dist_line += width;
370     cmap_line += width;
371     dist_line[0].reset(0);
372     dist_line[width - 1].reset(width - 1);
373     this_sqdist_line[0] = dist_line[0].sqdist();
374     this_sqdist_line[width - 1] = dist_line[width - 1].sqdist();
375     // Left to right scan.
376     for (int x = 1; x < width - 1; ++x) {
377       if (cmap_line[x]) {
378         this_sqdist_line[x] = 0;
379         assert(dist_line[x] == Distance::zero());
380         continue;
381       }
382 
383       // Propagate from left.
384       Distance left_dist = dist_line[x - 1];
385       uint32_t sqdist_left = this_sqdist_line[x - 1];
386       sqdist_left += 1 - (int(left_dist.vec.x) << 1);
387       // Propagate from top.
388       Distance top_dist = dist_line[x - width];
389       uint32_t sqdist_top = prev_sqdist_line[x];
390       sqdist_top += VERTICAL_SCALE_SQ - 2 * VERTICAL_SCALE_SQ * int(top_dist.vec.y);
391 
392       if (sqdist_left < sqdist_top) {
393         this_sqdist_line[x] = sqdist_left;
394         --left_dist.vec.x;
395         dist_line[x] = left_dist;
396         cmap_line[x] = cmap_line[x - 1];
397       } else {
398         this_sqdist_line[x] = sqdist_top;
399         --top_dist.vec.y;
400         dist_line[x] = top_dist;
401         cmap_line[x] = cmap_line[x - width];
402       }
403     }
404 
405     // Right to left scan.
406     for (int x = width - 2; x >= 1; --x) {
407       // Propagate from right.
408       Distance right_dist = dist_line[x + 1];
409       uint32_t sqdist_right = this_sqdist_line[x + 1];
410       sqdist_right += 1 + (int(right_dist.vec.x) << 1);
411 
412       if (sqdist_right < this_sqdist_line[x]) {
413         this_sqdist_line[x] = sqdist_right;
414         ++right_dist.vec.x;
415         dist_line[x] = right_dist;
416         cmap_line[x] = cmap_line[x + 1];
417       }
418     }
419 
420     std::swap(this_sqdist_line, prev_sqdist_line);
421   }
422 
423   // Bottom to top scan.
424   for (int y = height - 2; y >= 1; --y) {
425     dist_line -= width;
426     cmap_line -= width;
427     dist_line[0].reset(0);
428     dist_line[width - 1].reset(width - 1);
429     this_sqdist_line[0] = dist_line[0].sqdist();
430     this_sqdist_line[width - 1] = dist_line[width - 1].sqdist();
431     // Right to left scan.
432     for (int x = width - 2; x >= 1; --x) {
433       // Propagate from right.
434       Distance right_dist = dist_line[x + 1];
435       uint32_t sqdist_right = this_sqdist_line[x + 1];
436       sqdist_right += 1 + (int(right_dist.vec.x) << 1);
437       // Propagate from bottom.
438       Distance bottom_dist = dist_line[x + width];
439       uint32_t sqdist_bottom = prev_sqdist_line[x];
440       sqdist_bottom += VERTICAL_SCALE_SQ + 2 * VERTICAL_SCALE_SQ * int(bottom_dist.vec.y);
441 
442       this_sqdist_line[x] = dist_line[x].sqdist();
443 
444       if (sqdist_right < this_sqdist_line[x]) {
445         this_sqdist_line[x] = sqdist_right;
446         ++right_dist.vec.x;
447         dist_line[x] = right_dist;
448         assert(cmap_line[x] == 0 || cmap_line[x + 1] != 0);
449         cmap_line[x] = cmap_line[x + 1];
450       }
451       if (sqdist_bottom < this_sqdist_line[x]) {
452         this_sqdist_line[x] = sqdist_bottom;
453         ++bottom_dist.vec.y;
454         dist_line[x] = bottom_dist;
455         assert(cmap_line[x] == 0 || cmap_line[x + width] != 0);
456         cmap_line[x] = cmap_line[x + width];
457       }
458     }
459     // Left to right scan.
460     for (int x = 1; x < width - 1; ++x) {
461       // Propagate from left.
462       Distance left_dist = dist_line[x - 1];
463       uint32_t sqdist_left = this_sqdist_line[x - 1];
464       sqdist_left += 1 - (int(left_dist.vec.x) << 1);
465 
466       if (sqdist_left < this_sqdist_line[x]) {
467         this_sqdist_line[x] = sqdist_left;
468         --left_dist.vec.x;
469         dist_line[x] = left_dist;
470         assert(cmap_line[x] == 0 || cmap_line[x - 1] != 0);
471         cmap_line[x] = cmap_line[x - 1];
472       }
473     }
474 
475     std::swap(this_sqdist_line, prev_sqdist_line);
476   }
477 }  // voronoi
478 
voronoiSpecial(ConnectivityMap & cmap,std::vector<Distance> & dist,const Distance special_distance)479 void voronoiSpecial(ConnectivityMap& cmap, std::vector<Distance>& dist, const Distance special_distance) {
480   const int width = cmap.size().width() + 2;
481   const int height = cmap.size().height() + 2;
482 
483   std::vector<uint32_t> sqdists(width * 2, 0);
484   uint32_t* prev_sqdist_line = &sqdists[0];
485   uint32_t* this_sqdist_line = &sqdists[width];
486 
487   Distance* dist_line = &dist[0];
488   uint32_t* cmap_line = cmap.paddedData();
489 
490   dist_line[0].reset(0);
491   prev_sqdist_line[0] = dist_line[0].sqdist();
492   for (int x = 1; x < width; ++x) {
493     dist_line[x].vec.x = static_cast<int16_t>(dist_line[x - 1].vec.x - 1);
494     prev_sqdist_line[x] = prev_sqdist_line[x - 1] - (int(dist_line[x - 1].vec.x) << 1) + 1;
495   }
496 
497   // Top to bottom scan.
498   for (int y = 1; y < height - 1; ++y) {
499     dist_line += width;
500     cmap_line += width;
501     dist_line[0].reset(0);
502     dist_line[width - 1].reset(width - 1);
503     this_sqdist_line[0] = dist_line[0].sqdist();
504     this_sqdist_line[width - 1] = dist_line[width - 1].sqdist();
505     // Left to right scan.
506     for (int x = 1; x < width - 1; ++x) {
507       if (dist_line[x] == special_distance) {
508         continue;
509       }
510 
511       this_sqdist_line[x] = dist_line[x].sqdist();
512       // Propagate from left.
513       Distance left_dist = dist_line[x - 1];
514       if (left_dist != special_distance) {
515         uint32_t sqdist_left = this_sqdist_line[x - 1];
516         sqdist_left += 1 - (int(left_dist.vec.x) << 1);
517         if (sqdist_left < this_sqdist_line[x]) {
518           this_sqdist_line[x] = sqdist_left;
519           --left_dist.vec.x;
520           dist_line[x] = left_dist;
521           assert(cmap_line[x] == 0 || cmap_line[x - 1] != 0);
522           cmap_line[x] = cmap_line[x - 1];
523         }
524       }
525       // Propagate from top.
526       Distance top_dist = dist_line[x - width];
527       if (top_dist != special_distance) {
528         uint32_t sqdist_top = prev_sqdist_line[x];
529         sqdist_top += VERTICAL_SCALE_SQ - 2 * VERTICAL_SCALE_SQ * int(top_dist.vec.y);
530         if (sqdist_top < this_sqdist_line[x]) {
531           this_sqdist_line[x] = sqdist_top;
532           --top_dist.vec.y;
533           dist_line[x] = top_dist;
534           assert(cmap_line[x] == 0 || cmap_line[x - width] != 0);
535           cmap_line[x] = cmap_line[x - width];
536         }
537       }
538     }
539 
540     // Right to left scan.
541     for (int x = width - 2; x >= 1; --x) {
542       if (dist_line[x] == special_distance) {
543         continue;
544       }
545       // Propagate from right.
546       Distance right_dist = dist_line[x + 1];
547       if (right_dist != special_distance) {
548         uint32_t sqdist_right = this_sqdist_line[x + 1];
549         sqdist_right += 1 + (int(right_dist.vec.x) << 1);
550         if (sqdist_right < this_sqdist_line[x]) {
551           this_sqdist_line[x] = sqdist_right;
552           ++right_dist.vec.x;
553           dist_line[x] = right_dist;
554           assert(cmap_line[x] == 0 || cmap_line[x + 1] != 0);
555           cmap_line[x] = cmap_line[x + 1];
556         }
557       }
558     }
559 
560     std::swap(this_sqdist_line, prev_sqdist_line);
561   }
562 
563   // Bottom to top scan.
564   for (int y = height - 2; y >= 1; --y) {
565     dist_line -= width;
566     cmap_line -= width;
567     dist_line[0].reset(0);
568     dist_line[width - 1].reset(width - 1);
569     this_sqdist_line[0] = dist_line[0].sqdist();
570     this_sqdist_line[width - 1] = dist_line[width - 1].sqdist();
571     // Right to left scan.
572     for (int x = width - 2; x >= 1; --x) {
573       if (dist_line[x] == special_distance) {
574         continue;
575       }
576 
577       this_sqdist_line[x] = dist_line[x].sqdist();
578       // Propagate from right.
579       Distance right_dist = dist_line[x + 1];
580       if (right_dist != special_distance) {
581         uint32_t sqdist_right = this_sqdist_line[x + 1];
582         sqdist_right += 1 + (int(right_dist.vec.x) << 1);
583         if (sqdist_right < this_sqdist_line[x]) {
584           this_sqdist_line[x] = sqdist_right;
585           ++right_dist.vec.x;
586           dist_line[x] = right_dist;
587           assert(cmap_line[x] == 0 || cmap_line[x + 1] != 0);
588           cmap_line[x] = cmap_line[x + 1];
589         }
590       }
591       // Propagate from bottom.
592       Distance bottom_dist = dist_line[x + width];
593       if (bottom_dist != special_distance) {
594         uint32_t sqdist_bottom = prev_sqdist_line[x];
595         sqdist_bottom += VERTICAL_SCALE_SQ + 2 * VERTICAL_SCALE_SQ * int(bottom_dist.vec.y);
596         if (sqdist_bottom < this_sqdist_line[x]) {
597           this_sqdist_line[x] = sqdist_bottom;
598           ++bottom_dist.vec.y;
599           dist_line[x] = bottom_dist;
600           assert(cmap_line[x] == 0 || cmap_line[x + width] != 0);
601           cmap_line[x] = cmap_line[x + width];
602         }
603       }
604     }
605 
606     // Left to right scan.
607     for (int x = 1; x < width - 1; ++x) {
608       if (dist_line[x] == special_distance) {
609         continue;
610       }
611       // Propagate from left.
612       Distance left_dist = dist_line[x - 1];
613       if (left_dist != special_distance) {
614         uint32_t sqdist_left = this_sqdist_line[x - 1];
615         sqdist_left += 1 - (int(left_dist.vec.x) << 1);
616         if (sqdist_left < this_sqdist_line[x]) {
617           this_sqdist_line[x] = sqdist_left;
618           --left_dist.vec.x;
619           dist_line[x] = left_dist;
620           assert(cmap_line[x] == 0 || cmap_line[x - 1] != 0);
621           cmap_line[x] = cmap_line[x - 1];
622         }
623       }
624     }
625 
626     std::swap(this_sqdist_line, prev_sqdist_line);
627   }
628 }  // voronoiSpecial
629 
630 /**
631  * Calculate the minimum distance between components from neighboring
632  * Voronoi segments.
633  */
voronoiDistances(const ConnectivityMap & cmap,const std::vector<Distance> & distance_matrix,std::unordered_map<Connection,uint32_t,Connection::hash> & conns)634 void voronoiDistances(const ConnectivityMap& cmap,
635                       const std::vector<Distance>& distance_matrix,
636                       std::unordered_map<Connection, uint32_t, Connection::hash>& conns) {
637   const int width = cmap.size().width();
638   const int height = cmap.size().height();
639 
640   const int offsets[] = {-cmap.stride(), -1, 1, cmap.stride()};
641 
642   const uint32_t* const cmap_data = cmap.data();
643   const Distance* const distance_data = &distance_matrix[0] + width + 3;
644   for (int y = 0, offset = 0; y < height; ++y, offset += 2) {
645     for (int x = 0; x < width; ++x, ++offset) {
646       const uint32_t label = cmap_data[offset];
647       assert(label != 0);
648 
649       const int x1 = x + distance_data[offset].vec.x;
650       const int y1 = y + distance_data[offset].vec.y;
651 
652       for (int i : offsets) {
653         const int nbh_offset = offset + i;
654         const uint32_t nbh_label = cmap_data[nbh_offset];
655         if ((nbh_label == 0) || (nbh_label == label)) {
656           // label 0 can be encountered in
657           // padding lines.
658           continue;
659         }
660 
661         const int x2 = x + distance_data[nbh_offset].vec.x;
662         const int y2 = y + distance_data[nbh_offset].vec.y;
663         const int dx = x1 - x2;
664         const int dy = y1 - y2;
665         const uint32_t sqdist = dx * dx + dy * dy;
666 
667         updateDistance(conns, label, nbh_label, sqdist);
668       }
669     }
670   }
671 }  // voronoiDistances
672 
despeckleImpl(BinaryImage & image,const Dpi & dpi,const Settings & settings,const TaskStatus & status,DebugImages * const dbg)673 void despeckleImpl(BinaryImage& image,
674                    const Dpi& dpi,
675                    const Settings& settings,
676                    const TaskStatus& status,
677                    DebugImages* const dbg) {
678   ConnectivityMap cmap(image, CONN8);
679   if (cmap.maxLabel() == 0) {
680     // Completely white image?
681     return;
682   }
683 
684   status.throwIfCancelled();
685 
686   std::vector<Component> components(cmap.maxLabel() + 1);
687   std::vector<BoundingBox> bounding_boxes(cmap.maxLabel() + 1);
688 
689   const int width = image.width();
690   const int height = image.height();
691 
692   uint32_t* const cmap_data = cmap.data();
693 
694   // Count the number of pixels and a bounding rect of each component.
695   uint32_t* cmap_line = cmap_data;
696   const int cmap_stride = cmap.stride();
697   for (int y = 0; y < height; ++y) {
698     for (int x = 0; x < width; ++x) {
699       const uint32_t label = cmap_line[x];
700       ++components[label].num_pixels;
701       bounding_boxes[label].extend(x, y);
702     }
703     cmap_line += cmap_stride;
704   }
705 
706   status.throwIfCancelled();
707 
708   // Unify big components into one.
709   std::vector<uint32_t> remapping_table(components.size());
710   uint32_t unified_big_component = 0;
711   uint32_t next_avail_component = 1;
712   for (uint32_t label = 1; label <= cmap.maxLabel(); ++label) {
713     if ((bounding_boxes[label].width() < settings.bigObjectThreshold)
714         && (bounding_boxes[label].height() < settings.bigObjectThreshold)) {
715       components[next_avail_component] = components[label];
716       remapping_table[label] = next_avail_component;
717       ++next_avail_component;
718     } else {
719       if (unified_big_component == 0) {
720         unified_big_component = next_avail_component;
721         ++next_avail_component;
722         components[unified_big_component] = components[label];
723         // Set num_pixels to a large value so that canBeAttachedTo()
724         // always allows attaching to any such component.
725         components[unified_big_component].num_pixels = width * height;
726       }
727       remapping_table[label] = unified_big_component;
728     }
729   }
730   components.resize(next_avail_component);
731   std::vector<BoundingBox>().swap(bounding_boxes);  // We don't need them any more.
732   status.throwIfCancelled();
733 
734   const uint32_t max_label = next_avail_component - 1;
735   // Remapping individual pixels.
736   cmap_line = cmap_data;
737   for (int y = 0; y < height; ++y) {
738     for (int x = 0; x < width; ++x) {
739       cmap_line[x] = remapping_table[cmap_line[x]];
740     }
741     cmap_line += cmap_stride;
742   }
743   if (dbg) {
744     dbg->add(cmap.visualized(), "big_components_unified");
745   }
746 
747   status.throwIfCancelled();
748   // Build a Voronoi diagram.
749   std::vector<Distance> distance_matrix;
750   voronoi(cmap, distance_matrix);
751   if (dbg) {
752     dbg->add(cmap.visualized(), "voronoi");
753   }
754 
755   status.throwIfCancelled();
756 
757   Distance* const distance_data = &distance_matrix[0] + width + 3;
758 
759   // Now build a bidirectional map of distances between neighboring
760   // connected components.
761 
762   typedef std::unordered_map<Connection, uint32_t, Connection::hash> Connections;  // conn -> sqdist
763   Connections conns;
764 
765   voronoiDistances(cmap, distance_matrix, conns);
766 
767   status.throwIfCancelled();
768 
769   // Tag connected components with ANCHORED_TO_BIG or ANCHORED_TO_SMALL.
770   for (const Connections::value_type& pair : conns) {
771     const Connection conn(pair.first);
772     const uint32_t sqdist = pair.second;
773     Component& comp1 = components[conn.lesser_label];
774     Component& comp2 = components[conn.greater_label];
775     tagSourceComponent(comp1, comp2, sqdist, settings);
776     tagSourceComponent(comp2, comp1, sqdist, settings);
777   }
778 
779   // Prevent it from growing when we compute the Voronoi diagram
780   // the second time.
781   components[unified_big_component].setAnchoredToBig();
782 
783   bool have_anchored_to_small_but_not_big = false;
784   for (const Component& comp : components) {
785     have_anchored_to_small_but_not_big = comp.anchoredToSmallButNotBig();
786   }
787 
788   if (have_anchored_to_small_but_not_big) {
789     status.throwIfCancelled();
790 
791     // Give such components a second chance.  Maybe they do have
792     // big neighbors, but Voronoi regions from a smaller ones
793     // block the path to the bigger ones.
794 
795     const Distance zero_distance(Distance::zero());
796     const Distance special_distance(Distance::special());
797     for (int y = 0, offset = 0; y < height; ++y, offset += 2) {
798       for (int x = 0; x < width; ++x, ++offset) {
799         const uint32_t label = cmap_data[offset];
800         assert(label != 0);
801 
802         const Component& comp = components[label];
803         if (!comp.anchoredToSmallButNotBig()) {
804           if (distance_data[offset] == zero_distance) {
805             // Prevent this region from growing
806             // and from being taken over by another
807             // by another region.
808             distance_data[offset] = special_distance;
809           } else {
810             // Allow this region to be taken over by others.
811             // Note: x + 1 here is equivalent to x
812             // in voronoi() or voronoiSpecial().
813             distance_data[offset].reset(x + 1);
814           }
815         }
816       }
817     }
818 
819     status.throwIfCancelled();
820 
821     // Calculate the Voronoi diagram again, but this time
822     // treat pixels with a special distance in such a way
823     // to prevent them from spreading but also preventing
824     // them from being overwritten.
825     voronoiSpecial(cmap, distance_matrix, special_distance);
826     if (dbg) {
827       dbg->add(cmap.visualized(), "voronoi_special");
828     }
829 
830     status.throwIfCancelled();
831 
832     // We've got new connections.  Add them to the map.
833     voronoiDistances(cmap, distance_matrix, conns);
834   }
835 
836   status.throwIfCancelled();
837 
838   // Clear the distance matrix.
839   std::vector<Distance>().swap(distance_matrix);
840 
841   // Remove tags from components.
842   for (Component& comp : components) {
843     comp.clearTags();
844   }
845   // Build a directional connection map and only include
846   // good connections, that is those with a small enough
847   // distance.
848   // While at it, clear the bidirectional connection map.
849   std::vector<TargetSourceConn> target_source;
850   while (!conns.empty()) {
851     const auto it(conns.begin());
852     const uint32_t label1 = it->first.lesser_label;
853     const uint32_t label2 = it->first.greater_label;
854     const uint32_t sqdist = it->second;
855     const Component& comp1 = components[label1];
856     const Component& comp2 = components[label2];
857     if (canBeAttachedTo(comp1, comp2, sqdist, settings)) {
858       target_source.emplace_back(label2, label1);
859     }
860     if (canBeAttachedTo(comp2, comp1, sqdist, settings)) {
861       target_source.emplace_back(label1, label2);
862     }
863     conns.erase(it);
864   }
865 
866   std::sort(target_source.begin(), target_source.end());
867 
868   status.throwIfCancelled();
869 
870   // Create an index for quick access to a group of connections
871   // with a specified target.
872   std::vector<size_t> target_source_idx;
873   const size_t num_target_sources = target_source.size();
874   uint32_t prev_label = uint32_t(0) - 1;
875   for (size_t i = 0; i < num_target_sources; ++i) {
876     const TargetSourceConn& conn = target_source[i];
877     assert(conn.target != 0);
878     for (; prev_label != conn.target; ++prev_label) {
879       target_source_idx.push_back(i);
880     }
881     assert(target_source_idx.size() - 1 == conn.target);
882   }
883   for (auto label = static_cast<uint32_t>(target_source_idx.size()); label <= max_label; ++label) {
884     target_source_idx.push_back(num_target_sources);
885   }
886   // Labels of components that are to be retained.
887   FastQueue<uint32_t> ok_labels;
888   ok_labels.push(unified_big_component);
889 
890   while (!ok_labels.empty()) {
891     const uint32_t label = ok_labels.front();
892     ok_labels.pop();
893 
894     Component& comp = components[label];
895     if (comp.anchoredToBig()) {
896       continue;
897     }
898 
899     comp.setAnchoredToBig();
900 
901     size_t idx = target_source_idx[label];
902     while (idx < num_target_sources && target_source[idx].target == label) {
903       ok_labels.push(target_source[idx].source);
904       ++idx;
905     }
906   }
907 
908   status.throwIfCancelled();
909   // Remove unmarked components from the binary image.
910   const uint32_t msb = uint32_t(1) << 31;
911   uint32_t* image_line = image.data();
912   const int image_stride = image.wordsPerLine();
913   cmap_line = cmap_data;
914   for (int y = 0; y < height; ++y) {
915     for (int x = 0; x < width; ++x) {
916       if (!components[cmap_line[x]].anchoredToBig()) {
917         image_line[x >> 5] &= ~(msb >> (x & 31));
918       }
919     }
920     image_line += image_stride;
921     cmap_line += cmap_stride;
922   }
923 }
924 }  // namespace
925 
despeckle(const BinaryImage & src,const Dpi & dpi,const Level level,const TaskStatus & status,DebugImages * const dbg)926 BinaryImage Despeckle::despeckle(const BinaryImage& src,
927                                  const Dpi& dpi,
928                                  const Level level,
929                                  const TaskStatus& status,
930                                  DebugImages* const dbg) {
931   BinaryImage dst(src);
932   despeckleInPlace(dst, dpi, level, status, dbg);
933 
934   return dst;
935 }
936 
despeckleInPlace(BinaryImage & image,const Dpi & dpi,const Level level,const TaskStatus & status,DebugImages * const dbg)937 void Despeckle::despeckleInPlace(BinaryImage& image,
938                                  const Dpi& dpi,
939                                  const Level level,
940                                  const TaskStatus& status,
941                                  DebugImages* const dbg) {
942   const Settings settings = Settings::get(level, dpi);
943   despeckleImpl(image, dpi, settings, status, dbg);
944 }
945 
despeckle(const imageproc::BinaryImage & src,const Dpi & dpi,const double level,const TaskStatus & status,DebugImages * dbg)946 imageproc::BinaryImage Despeckle::despeckle(const imageproc::BinaryImage& src,
947                                             const Dpi& dpi,
948                                             const double level,
949                                             const TaskStatus& status,
950                                             DebugImages* dbg) {
951   BinaryImage dst(src);
952   despeckleInPlace(dst, dpi, level, status, dbg);
953 
954   return dst;
955 }
956 
despeckleInPlace(imageproc::BinaryImage & image,const Dpi & dpi,const double level,const TaskStatus & status,DebugImages * dbg)957 void Despeckle::despeckleInPlace(imageproc::BinaryImage& image,
958                                  const Dpi& dpi,
959                                  const double level,
960                                  const TaskStatus& status,
961                                  DebugImages* dbg) {
962   const Settings settings = Settings::get(level, dpi);
963   despeckleImpl(image, dpi, settings, status, dbg);
964 }
965 // Despeckle::despeckleInPlace
966