1 /*
2     Scan Tailor - Interactive post-processing tool for scanned pages.
3     Copyright (C)  Joseph Artsimovich <joseph.artsimovich@gmail.com>
4 
5     This program is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "TextLineRefiner.h"
20 #include <QDebug>
21 #include <QPainter>
22 #include <boost/foreach.hpp>
23 #include <boost/lambda/bind.hpp>
24 #include <boost/lambda/lambda.hpp>
25 #include <cmath>
26 #include "DebugImages.h"
27 #include "NumericTraits.h"
28 #include "imageproc/GaussBlur.h"
29 #include "imageproc/Sobel.h"
30 
31 using namespace imageproc;
32 
33 namespace dewarping {
34 class TextLineRefiner::SnakeLength {
35  public:
36   explicit SnakeLength(const Snake& snake);
37 
totalLength() const38   float totalLength() const { return m_totalLength; }
39 
avgSegmentLength() const40   float avgSegmentLength() const { return m_avgSegmentLength; }
41 
arcLengthAt(size_t node_idx) const42   float arcLengthAt(size_t node_idx) const { return m_integralLength[node_idx]; }
43 
arcLengthFractionAt(size_t node_idx) const44   float arcLengthFractionAt(size_t node_idx) const { return m_integralLength[node_idx] * m_reciprocalTotalLength; }
45 
lengthFromTo(size_t from_node_idx,size_t to_node_idx) const46   float lengthFromTo(size_t from_node_idx, size_t to_node_idx) const {
47     return m_integralLength[to_node_idx] - m_integralLength[from_node_idx];
48   }
49 
50  private:
51   std::vector<float> m_integralLength;
52   float m_totalLength;
53   float m_reciprocalTotalLength;
54   float m_avgSegmentLength;
55 };
56 
57 
58 struct TextLineRefiner::FrenetFrame {
59   Vec2f unitTangent;
60   Vec2f unitDownNormal;
61 };
62 
63 
64 class TextLineRefiner::Optimizer {
65  public:
66   Optimizer(const Snake& snake, const Vec2f& unit_down_vec, float factor);
67 
68   bool thicknessAdjustment(Snake& snake, const Grid<float>& gradient);
69 
70   bool tangentMovement(Snake& snake, const Grid<float>& gradient);
71 
72   bool normalMovement(Snake& snake, const Grid<float>& gradient);
73 
74  private:
75   static float calcExternalEnergy(const Grid<float>& gradient, const SnakeNode& node, Vec2f down_normal);
76 
77   static float calcElasticityEnergy(const SnakeNode& node1, const SnakeNode& node2, float avg_dist);
78 
79   static float calcBendingEnergy(const SnakeNode& node, const SnakeNode& prev_node, const SnakeNode& prev_prev_node);
80 
81   static const float m_elasticityWeight;
82   static const float m_bendingWeight;
83   static const float m_topExternalWeight;
84   static const float m_bottomExternalWeight;
85   const float m_factor;
86   SnakeLength m_snakeLength;
87   std::vector<FrenetFrame> m_frenetFrames;
88 };
89 
90 
TextLineRefiner(const GrayImage & image,const Dpi & dpi,const Vec2f & unit_down_vector)91 TextLineRefiner::TextLineRefiner(const GrayImage& image, const Dpi& dpi, const Vec2f& unit_down_vector)
92     : m_image(image), m_dpi(dpi), m_unitDownVec(unit_down_vector) {}
93 
refine(std::list<std::vector<QPointF>> & polylines,const int iterations,DebugImages * dbg) const94 void TextLineRefiner::refine(std::list<std::vector<QPointF>>& polylines, const int iterations, DebugImages* dbg) const {
95   if (polylines.empty()) {
96     return;
97   }
98 
99   std::vector<Snake> snakes;
100   snakes.reserve(polylines.size());
101 
102   // Convert from polylines to snakes.
103   for (const std::vector<QPointF>& polyline : polylines) {
104     snakes.push_back(makeSnake(polyline, iterations));
105   }
106 
107   if (dbg) {
108     dbg->add(visualizeSnakes(snakes), "initial_snakes");
109   }
110 
111   Grid<float> gradient(m_image.width(), m_image.height(), /*padding=*/0);
112 
113   // Start with a rather strong blur.
114   float h_sigma = (4.0f / 200.f) * m_dpi.horizontal();
115   float v_sigma = (4.0f / 200.f) * m_dpi.vertical();
116   calcBlurredGradient(gradient, h_sigma, v_sigma);
117 
118   for (Snake& snake : snakes) {
119     evolveSnake(snake, gradient, ON_CONVERGENCE_STOP);
120   }
121   if (dbg) {
122     dbg->add(visualizeSnakes(snakes, &gradient), "evolved_snakes1");
123   }
124 
125   // Less blurring this time.
126   h_sigma *= 0.5f;
127   v_sigma *= 0.5f;
128   calcBlurredGradient(gradient, h_sigma, v_sigma);
129 
130   for (Snake& snake : snakes) {
131     evolveSnake(snake, gradient, ON_CONVERGENCE_GO_FINER);
132   }
133   if (dbg) {
134     dbg->add(visualizeSnakes(snakes, &gradient), "evolved_snakes2");
135   }
136 
137   // Convert from snakes back to polylines.
138   int i = -1;
139   for (std::vector<QPointF>& polyline : polylines) {
140     ++i;
141     const Snake& snake = snakes[i];
142     polyline.clear();
143     for (const SnakeNode& node : snake.nodes) {
144       polyline.push_back(node.center);
145     }
146   }
147 }  // TextLineRefiner::refine
148 
calcBlurredGradient(Grid<float> & gradient,float h_sigma,float v_sigma) const149 void TextLineRefiner::calcBlurredGradient(Grid<float>& gradient, float h_sigma, float v_sigma) const {
150   using namespace boost::lambda;
151 
152   const float downscale = 1.0f / (255.0f * 8.0f);
153   Grid<float> vert_grad(m_image.width(), m_image.height(), /*padding=*/0);
154   horizontalSobel<float>(m_image.width(), m_image.height(), m_image.data(), m_image.stride(), _1 * downscale,
155                          gradient.data(), gradient.stride(), _1 = _2, _1, gradient.data(), gradient.stride(), _1 = _2);
156   verticalSobel<float>(m_image.width(), m_image.height(), m_image.data(), m_image.stride(), _1 * downscale,
157                        vert_grad.data(), vert_grad.stride(), _1 = _2, _1, gradient.data(), gradient.stride(),
158                        _1 = _1 * m_unitDownVec[0] + _2 * m_unitDownVec[1]);
159   Grid<float>().swap(vert_grad);  // Save memory.
160   gaussBlurGeneric(m_image.size(), h_sigma, v_sigma, gradient.data(), gradient.stride(), _1, gradient.data(),
161                    gradient.stride(), _1 = _2);
162 }
163 
externalEnergyAt(const Grid<float> & gradient,const Vec2f & pos,float penalty_if_outside)164 float TextLineRefiner::externalEnergyAt(const Grid<float>& gradient, const Vec2f& pos, float penalty_if_outside) {
165   const auto x_base = static_cast<const float>(std::floor(pos[0]));
166   const auto y_base = static_cast<const float>(std::floor(pos[1]));
167   const auto x_base_i = (int) x_base;
168   const auto y_base_i = (int) y_base;
169 
170   if ((x_base_i < 0) || (y_base_i < 0) || (x_base_i + 1 >= gradient.width()) || (y_base_i + 1 >= gradient.height())) {
171     return penalty_if_outside;
172   }
173 
174   const float x = pos[0] - x_base;
175   const float y = pos[1] - y_base;
176   const float x1 = 1.0f - x;
177   const float y1 = 1.0f - y;
178 
179   const int stride = gradient.stride();
180   const float* base = gradient.data() + y_base_i * stride + x_base_i;
181 
182   return base[0] * x1 * y1 + base[1] * x * y1 + base[stride] * x1 * y + base[stride + 1] * x * y;
183 }
184 
makeSnake(const std::vector<QPointF> & polyline,const int iterations)185 TextLineRefiner::Snake TextLineRefiner::makeSnake(const std::vector<QPointF>& polyline, const int iterations) {
186   float total_length = 0;
187 
188   const size_t polyline_size = polyline.size();
189   for (size_t i = 1; i < polyline_size; ++i) {
190     total_length += std::sqrt(Vec2f(polyline[i] - polyline[i - 1]).squaredNorm());
191   }
192 
193   const auto points_in_snake = static_cast<const int>(total_length / 20);
194   Snake snake;
195   snake.iterationsRemaining = iterations;
196 
197   int points_inserted = 0;
198   float base_t = 0;
199   float next_insert_t = 0;
200   for (size_t i = 1; i < polyline_size; ++i) {
201     const Vec2f base(polyline[i - 1]);
202     const Vec2f vec((polyline[i] - base));
203     const auto next_t = static_cast<const float>(base_t + std::sqrt(vec.squaredNorm()));
204 
205     while (next_t >= next_insert_t) {
206       const float fraction = (next_insert_t - base_t) / (next_t - base_t);
207       SnakeNode node;
208       node.center = base + fraction * vec;
209       node.ribHalfLength = 4;
210       snake.nodes.push_back(node);
211       ++points_inserted;
212       next_insert_t = total_length * points_inserted / (points_in_snake - 1);
213     }
214 
215     base_t = next_t;
216   }
217 
218   return snake;
219 }  // TextLineRefiner::makeSnake
220 
calcFrenetFrames(std::vector<FrenetFrame> & frenet_frames,const Snake & snake,const SnakeLength & snake_length,const Vec2f & unit_down_vec)221 void TextLineRefiner::calcFrenetFrames(std::vector<FrenetFrame>& frenet_frames,
222                                        const Snake& snake,
223                                        const SnakeLength& snake_length,
224                                        const Vec2f& unit_down_vec) {
225   const size_t num_nodes = snake.nodes.size();
226   frenet_frames.resize(num_nodes);
227 
228   if (num_nodes == 0) {
229     return;
230   } else if (num_nodes == 1) {
231     frenet_frames[0].unitTangent = Vec2f();
232     frenet_frames[0].unitDownNormal = Vec2f();
233 
234     return;
235   }
236 
237   // First segment.
238   Vec2f first_segment(snake.nodes[1].center - snake.nodes[0].center);
239   const float first_segment_len = snake_length.arcLengthAt(1);
240   if (first_segment_len > std::numeric_limits<float>::epsilon()) {
241     first_segment /= first_segment_len;
242     frenet_frames.front().unitTangent = first_segment;
243   }
244   // Segments between first and last, exclusive.
245   Vec2f prev_segment(first_segment);
246   for (size_t i = 1; i < num_nodes - 1; ++i) {
247     Vec2f next_segment(snake.nodes[i + 1].center - snake.nodes[i].center);
248     const float next_segment_len = snake_length.lengthFromTo(i, i + 1);
249     if (next_segment_len > std::numeric_limits<float>::epsilon()) {
250       next_segment /= next_segment_len;
251     }
252 
253     Vec2f tangent_vec(0.5 * (prev_segment + next_segment));
254     const auto len = static_cast<const float>(std::sqrt(tangent_vec.squaredNorm()));
255     if (len > std::numeric_limits<float>::epsilon()) {
256       tangent_vec /= len;
257     }
258     frenet_frames[i].unitTangent = tangent_vec;
259 
260     prev_segment = next_segment;
261   }
262 
263   // Last segments.
264   Vec2f last_segment(snake.nodes[num_nodes - 1].center - snake.nodes[num_nodes - 2].center);
265   const float last_segment_len = snake_length.lengthFromTo(num_nodes - 2, num_nodes - 1);
266   if (last_segment_len > std::numeric_limits<float>::epsilon()) {
267     last_segment /= last_segment_len;
268     frenet_frames.back().unitTangent = last_segment;
269   }
270 
271   // Calculate normals and make sure they point down.
272   for (FrenetFrame& frame : frenet_frames) {
273     frame.unitDownNormal = Vec2f(frame.unitTangent[1], -frame.unitTangent[0]);
274     if (frame.unitDownNormal.dot(unit_down_vec) < 0) {
275       frame.unitDownNormal = -frame.unitDownNormal;
276     }
277   }
278 }  // TextLineRefiner::calcFrenetFrames
279 
evolveSnake(Snake & snake,const Grid<float> & gradient,const OnConvergence on_convergence) const280 void TextLineRefiner::evolveSnake(Snake& snake, const Grid<float>& gradient, const OnConvergence on_convergence) const {
281   float factor = 1.0f;
282 
283   while (snake.iterationsRemaining > 0) {
284     --snake.iterationsRemaining;
285 
286     Optimizer optimizer(snake, m_unitDownVec, factor);
287     bool changed = false;
288     changed |= optimizer.thicknessAdjustment(snake, gradient);
289     changed |= optimizer.tangentMovement(snake, gradient);
290     changed |= optimizer.normalMovement(snake, gradient);
291 
292     if (!changed) {
293       // qDebug() << "Converged.  Iterations remaining = " << snake.iterationsRemaining;
294       if (on_convergence == ON_CONVERGENCE_STOP) {
295         break;
296       } else {
297         factor *= 0.5f;
298       }
299     }
300   }
301 }
302 
visualizeGradient(const Grid<float> & gradient) const303 QImage TextLineRefiner::visualizeGradient(const Grid<float>& gradient) const {
304   const int width = gradient.width();
305   const int height = gradient.height();
306   const int gradient_stride = gradient.stride();
307   // First let's find the maximum and minimum values.
308   float min_value = NumericTraits<float>::max();
309   float max_value = NumericTraits<float>::min();
310 
311   const float* gradient_line = gradient.data();
312   for (int y = 0; y < height; ++y) {
313     for (int x = 0; x < width; ++x) {
314       const float value = gradient_line[x];
315       if (value < min_value) {
316         min_value = value;
317       } else if (value > max_value) {
318         max_value = value;
319       }
320     }
321     gradient_line += gradient_stride;
322   }
323 
324   float scale = std::max(max_value, -min_value);
325   if (scale > std::numeric_limits<float>::epsilon()) {
326     scale = 255.0f / scale;
327   }
328 
329   QImage overlay(width, height, QImage::Format_ARGB32_Premultiplied);
330   auto* overlay_line = (uint32_t*) overlay.bits();
331   const int overlay_stride = overlay.bytesPerLine() / 4;
332 
333   gradient_line = gradient.data();
334   for (int y = 0; y < height; ++y) {
335     for (int x = 0; x < width; ++x) {
336       const float value = gradient_line[x] * scale;
337       const int magnitude = qBound(0, static_cast<const int&>(std::round(std::fabs(value))), 255);
338       if (value > 0) {
339         // Red for positive gradients which indicate bottom edges.
340         overlay_line[x] = qRgba(magnitude, 0, 0, magnitude);
341       } else {
342         overlay_line[x] = qRgba(0, 0, magnitude, magnitude);
343       }
344     }
345     gradient_line += gradient_stride;
346     overlay_line += overlay_stride;
347   }
348 
349   QImage canvas(m_image.toQImage().convertToFormat(QImage::Format_ARGB32_Premultiplied));
350   QPainter painter(&canvas);
351   painter.drawImage(0, 0, overlay);
352 
353   return canvas;
354 }  // TextLineRefiner::visualizeGradient
355 
visualizeSnakes(const std::vector<Snake> & snakes,const Grid<float> * gradient) const356 QImage TextLineRefiner::visualizeSnakes(const std::vector<Snake>& snakes, const Grid<float>* gradient) const {
357   QImage canvas;
358   if (gradient) {
359     canvas = visualizeGradient(*gradient);
360   } else {
361     canvas = m_image.toQImage().convertToFormat(QImage::Format_ARGB32_Premultiplied);
362   }
363 
364   QPainter painter(&canvas);
365   painter.setRenderHint(QPainter::Antialiasing);
366 
367   QPen top_pen(QColor(0, 0, 255));
368   top_pen.setWidthF(1.5);
369 
370   QPen bottom_pen(QColor(255, 0, 0));
371   bottom_pen.setWidthF(1.5);
372 
373   QPen middle_pen(QColor(255, 0, 255));
374   middle_pen.setWidth(static_cast<int>(1.5));
375 
376   QBrush knot_brush(QColor(255, 255, 0, 180));
377   painter.setBrush(knot_brush);
378 
379   QRectF knot_rect(0, 0, 7, 7);
380   std::vector<FrenetFrame> frenet_frames;
381 
382   for (const Snake& snake : snakes) {
383     const SnakeLength snake_length(snake);
384     calcFrenetFrames(frenet_frames, snake, snake_length, m_unitDownVec);
385     QVector<QPointF> top_polyline;
386     QVector<QPointF> middle_polyline;
387     QVector<QPointF> bottom_polyline;
388 
389     const size_t num_nodes = snake.nodes.size();
390     for (size_t i = 0; i < num_nodes; ++i) {
391       const QPointF mid(snake.nodes[i].center + QPointF(0.5, 0.5));
392       const QPointF top(mid - snake.nodes[i].ribHalfLength * frenet_frames[i].unitDownNormal);
393       const QPointF bottom(mid + snake.nodes[i].ribHalfLength * frenet_frames[i].unitDownNormal);
394       top_polyline << top;
395       middle_polyline << mid;
396       bottom_polyline << bottom;
397     }
398 
399     // Draw polylines.
400     painter.setPen(top_pen);
401     painter.drawPolyline(top_polyline);
402 
403     painter.setPen(bottom_pen);
404     painter.drawPolyline(bottom_polyline);
405 
406     painter.setPen(middle_pen);
407     painter.drawPolyline(middle_polyline);
408 
409     // Draw knots.
410     painter.setPen(Qt::NoPen);
411     for (const QPointF& pt : middle_polyline) {
412       knot_rect.moveCenter(pt);
413       painter.drawEllipse(knot_rect);
414     }
415   }
416 
417   return canvas;
418 }  // TextLineRefiner::visualizeSnakes
419 
420 /*============================ SnakeLength =============================*/
421 
SnakeLength(const Snake & snake)422 TextLineRefiner::SnakeLength::SnakeLength(const Snake& snake)
423     : m_integralLength(snake.nodes.size()), m_totalLength(), m_reciprocalTotalLength(), m_avgSegmentLength() {
424   const size_t num_nodes = snake.nodes.size();
425   float arc_length_accum = 0;
426   for (size_t i = 1; i < num_nodes; ++i) {
427     const Vec2f vec(snake.nodes[i].center - snake.nodes[i - 1].center);
428     arc_length_accum += std::sqrt(vec.squaredNorm());
429     m_integralLength[i] = arc_length_accum;
430   }
431   m_totalLength = arc_length_accum;
432   if (m_totalLength > std::numeric_limits<float>::epsilon()) {
433     m_reciprocalTotalLength = 1.0f / m_totalLength;
434   }
435   if (num_nodes > 1) {
436     m_avgSegmentLength = m_totalLength / (num_nodes - 1);
437   }
438 }
439 
440 /*=========================== Optimizer =============================*/
441 
442 const float TextLineRefiner::Optimizer::m_elasticityWeight = 0.2f;
443 const float TextLineRefiner::Optimizer::m_bendingWeight = 1.8f;
444 const float TextLineRefiner::Optimizer::m_topExternalWeight = 1.0f;
445 const float TextLineRefiner::Optimizer::m_bottomExternalWeight = 1.0f;
446 
Optimizer(const Snake & snake,const Vec2f & unit_down_vec,float factor)447 TextLineRefiner::Optimizer::Optimizer(const Snake& snake, const Vec2f& unit_down_vec, float factor)
448     : m_factor(factor), m_snakeLength(snake) {
449   calcFrenetFrames(m_frenetFrames, snake, m_snakeLength, unit_down_vec);
450 }
451 
thicknessAdjustment(Snake & snake,const Grid<float> & gradient)452 bool TextLineRefiner::Optimizer::thicknessAdjustment(Snake& snake, const Grid<float>& gradient) {
453   const size_t num_nodes = snake.nodes.size();
454 
455   const float rib_adjustments[] = {0.0f * m_factor, 0.5f * m_factor, -0.5f * m_factor};
456   enum { NUM_RIB_ADJUSTMENTS = sizeof(rib_adjustments) / sizeof(rib_adjustments[0]) };
457 
458   int best_i = 0;
459   int best_j = 0;
460   float best_cost = NumericTraits<float>::max();
461   for (int i = 0; i < NUM_RIB_ADJUSTMENTS; ++i) {
462     const float head_rib = snake.nodes.front().ribHalfLength + rib_adjustments[i];
463     if (head_rib <= std::numeric_limits<float>::epsilon()) {
464       continue;
465     }
466 
467     for (int j = 0; j < NUM_RIB_ADJUSTMENTS; ++j) {
468       const float tail_rib = snake.nodes.back().ribHalfLength + rib_adjustments[j];
469       if (tail_rib <= std::numeric_limits<float>::epsilon()) {
470         continue;
471       }
472 
473       float cost = 0;
474       for (size_t node_idx = 0; node_idx < num_nodes; ++node_idx) {
475         const float t = m_snakeLength.arcLengthFractionAt(node_idx);
476         const float rib = head_rib + t * (tail_rib - head_rib);
477         const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
478 
479         SnakeNode node(snake.nodes[node_idx]);
480         node.ribHalfLength = rib;
481         cost += calcExternalEnergy(gradient, node, down_normal);
482       }
483       if (cost < best_cost) {
484         best_cost = cost;
485         best_i = i;
486         best_j = j;
487       }
488     }
489   }
490   const float head_rib = snake.nodes.front().ribHalfLength + rib_adjustments[best_i];
491   const float tail_rib = snake.nodes.back().ribHalfLength + rib_adjustments[best_j];
492   for (size_t node_idx = 0; node_idx < num_nodes; ++node_idx) {
493     const float t = m_snakeLength.arcLengthFractionAt(node_idx);
494     snake.nodes[node_idx].ribHalfLength = head_rib + t * (tail_rib - head_rib);
495     // Note that we need to recalculate inner ribs even if outer ribs
496     // didn't change, as movement of ribs in tangent direction affects
497     // interpolation.
498   }
499 
500   return rib_adjustments[best_i] != 0 || rib_adjustments[best_j] != 0;
501 }  // TextLineRefiner::Optimizer::thicknessAdjustment
502 
tangentMovement(Snake & snake,const Grid<float> & gradient)503 bool TextLineRefiner::Optimizer::tangentMovement(Snake& snake, const Grid<float>& gradient) {
504   const size_t num_nodes = snake.nodes.size();
505   if (num_nodes < 3) {
506     return false;
507   }
508 
509   const float tangent_movements[] = {0.0f * m_factor, 1.0f * m_factor, -1.0f * m_factor};
510   enum { NUM_TANGENT_MOVEMENTS = sizeof(tangent_movements) / sizeof(tangent_movements[0]) };
511 
512   std::vector<uint32_t> paths;
513   std::vector<uint32_t> new_paths;
514   std::vector<Step> step_storage;
515   // Note that we don't move the first and the last node in tangent direction.
516   paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
517   step_storage.emplace_back();
518   step_storage.back().prevStepIdx = ~uint32_t(0);
519   step_storage.back().node = snake.nodes.front();
520   step_storage.back().pathCost = 0;
521 
522   for (size_t node_idx = 1; node_idx < num_nodes - 1; ++node_idx) {
523     const Vec2f initial_pos(snake.nodes[node_idx].center);
524     const float rib = snake.nodes[node_idx].ribHalfLength;
525     const Vec2f unit_tangent(m_frenetFrames[node_idx].unitTangent);
526     const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
527 
528     for (float tangent_movement : tangent_movements) {
529       Step step;
530       step.prevStepIdx = ~uint32_t(0);
531       step.node.center = initial_pos + tangent_movement * unit_tangent;
532       step.node.ribHalfLength = rib;
533       step.pathCost = NumericTraits<float>::max();
534 
535       float base_cost = calcExternalEnergy(gradient, step.node, down_normal);
536 
537       if (node_idx == num_nodes - 2) {
538         // Take into account the distance to the last node as well.
539         base_cost += calcElasticityEnergy(step.node, snake.nodes.back(), m_snakeLength.avgSegmentLength());
540       }
541 
542       // Now find the best step for the previous node to combine with.
543       for (uint32_t prev_step_idx : paths) {
544         const Step& prev_step = step_storage[prev_step_idx];
545         const float cost = base_cost + prev_step.pathCost
546                            + calcElasticityEnergy(step.node, prev_step.node, m_snakeLength.avgSegmentLength());
547 
548         if (cost < step.pathCost) {
549           step.pathCost = cost;
550           step.prevStepIdx = prev_step_idx;
551         }
552       }
553       assert(step.prevStepIdx != ~uint32_t(0));
554       new_paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
555       step_storage.push_back(step);
556     }
557     assert(!new_paths.empty());
558     paths.swap(new_paths);
559     new_paths.clear();
560   }
561 
562   // Find the best overall path.
563   uint32_t best_path_idx = ~uint32_t(0);
564   float best_cost = NumericTraits<float>::max();
565   for (uint32_t last_step_idx : paths) {
566     const Step& step = step_storage[last_step_idx];
567     if (step.pathCost < best_cost) {
568       best_cost = step.pathCost;
569       best_path_idx = last_step_idx;
570     }
571   }
572   // Having found the best path, convert it back to a snake.
573   float max_sqdist = 0;
574   uint32_t step_idx = best_path_idx;
575   for (auto node_idx = static_cast<int>(num_nodes - 2); node_idx > 0; --node_idx) {
576     assert(step_idx != ~uint32_t(0));
577     const Step& step = step_storage[step_idx];
578     SnakeNode& node = snake.nodes[node_idx];
579 
580     const float sqdist = (node.center - step.node.center).squaredNorm();
581     max_sqdist = std::max<float>(max_sqdist, sqdist);
582 
583     node = step.node;
584     step_idx = step.prevStepIdx;
585   }
586 
587   return max_sqdist > std::numeric_limits<float>::epsilon();
588 }  // TextLineRefiner::Optimizer::tangentMovement
589 
normalMovement(Snake & snake,const Grid<float> & gradient)590 bool TextLineRefiner::Optimizer::normalMovement(Snake& snake, const Grid<float>& gradient) {
591   const size_t num_nodes = snake.nodes.size();
592   if (num_nodes < 3) {
593     return false;
594   }
595 
596   const float normal_movements[] = {0.0f * m_factor, 1.0f * m_factor, -1.0f * m_factor};
597   enum { NUM_NORMAL_MOVEMENTS = sizeof(normal_movements) / sizeof(normal_movements[0]) };
598 
599   std::vector<uint32_t> paths;
600   std::vector<uint32_t> new_paths;
601   std::vector<Step> step_storage;
602   // The first two nodes pose a problem for us.  These nodes don't have two predecessors,
603   // and therefore we can't take bending into the account.  We could take the followers
604   // instead of the ancestors, but then this follower is going to move itself, making
605   // our calculations less accurate.  The proper solution is to provide not N but N*N
606   // paths to the 3rd node, each path corresponding to a combination of movement of
607   // the first and the second node.  That's the approach we are taking here.
608   for (float normal_movement : normal_movements) {
609     const auto prev_step_idx = static_cast<const uint32_t>(step_storage.size());
610     {
611       // Movements of the first node.
612       const Vec2f down_normal(m_frenetFrames[0].unitDownNormal);
613       Step step;
614       step.node.center = snake.nodes[0].center + normal_movement * down_normal;
615       step.node.ribHalfLength = snake.nodes[0].ribHalfLength;
616       step.prevStepIdx = ~uint32_t(0);
617       step.pathCost = calcExternalEnergy(gradient, step.node, down_normal);
618 
619       step_storage.push_back(step);
620     }
621 
622     for (float j : normal_movements) {
623       // Movements of the second node.
624       const Vec2f down_normal(m_frenetFrames[1].unitDownNormal);
625 
626       Step step;
627       step.node.center = snake.nodes[1].center + j * down_normal;
628       step.node.ribHalfLength = snake.nodes[1].ribHalfLength;
629       step.prevStepIdx = prev_step_idx;
630       step.pathCost = step_storage[prev_step_idx].pathCost + calcExternalEnergy(gradient, step.node, down_normal);
631 
632       paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
633       step_storage.push_back(step);
634     }
635   }
636 
637   for (size_t node_idx = 2; node_idx < num_nodes; ++node_idx) {
638     const SnakeNode& node = snake.nodes[node_idx];
639     const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
640 
641     for (float normal_movement : normal_movements) {
642       Step step;
643       step.prevStepIdx = ~uint32_t(0);
644       step.node.center = node.center + normal_movement * down_normal;
645       step.node.ribHalfLength = node.ribHalfLength;
646       step.pathCost = NumericTraits<float>::max();
647 
648       const float base_cost = calcExternalEnergy(gradient, step.node, down_normal);
649 
650       // Now find the best step for the previous node to combine with.
651       for (uint32_t prev_step_idx : paths) {
652         const Step& prev_step = step_storage[prev_step_idx];
653         const Step& prev_prev_step = step_storage[prev_step.prevStepIdx];
654 
655         const float cost
656             = base_cost + prev_step.pathCost + calcBendingEnergy(step.node, prev_step.node, prev_prev_step.node);
657 
658         if (cost < step.pathCost) {
659           step.pathCost = cost;
660           step.prevStepIdx = prev_step_idx;
661         }
662       }
663       assert(step.prevStepIdx != ~uint32_t(0));
664       new_paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
665       step_storage.push_back(step);
666     }
667     assert(!new_paths.empty());
668     paths.swap(new_paths);
669     new_paths.clear();
670   }
671 
672   // Find the best overall path.
673   uint32_t best_path_idx = ~uint32_t(0);
674   float best_cost = NumericTraits<float>::max();
675   for (uint32_t last_step_idx : paths) {
676     const Step& step = step_storage[last_step_idx];
677     if (step.pathCost < best_cost) {
678       best_cost = step.pathCost;
679       best_path_idx = last_step_idx;
680     }
681   }
682   // Having found the best path, convert it back to a snake.
683   float max_sqdist = 0;
684   uint32_t step_idx = best_path_idx;
685   for (auto node_idx = static_cast<int>(num_nodes - 1); node_idx >= 0; --node_idx) {
686     assert(step_idx != ~uint32_t(0));
687     const Step& step = step_storage[step_idx];
688     SnakeNode& node = snake.nodes[node_idx];
689 
690     const float sqdist = (node.center - step.node.center).squaredNorm();
691     max_sqdist = std::max<float>(max_sqdist, sqdist);
692 
693     node = step.node;
694     step_idx = step.prevStepIdx;
695   }
696 
697   return max_sqdist > std::numeric_limits<float>::epsilon();
698 }  // TextLineRefiner::Optimizer::normalMovement
699 
calcExternalEnergy(const Grid<float> & gradient,const SnakeNode & node,const Vec2f down_normal)700 float TextLineRefiner::Optimizer::calcExternalEnergy(const Grid<float>& gradient,
701                                                      const SnakeNode& node,
702                                                      const Vec2f down_normal) {
703   const Vec2f top(node.center - node.ribHalfLength * down_normal);
704   const Vec2f bottom(node.center + node.ribHalfLength * down_normal);
705 
706   const float top_grad = externalEnergyAt(gradient, top, 0.0f);
707   const float bottom_grad = externalEnergyAt(gradient, bottom, 0.0f);
708 
709   // Surprisingly, it turns out it's a bad idea to penalize for the opposite
710   // sign in the gradient.  Sometimes a snake's edge has to move over the
711   // "wrong" gradient ridge before it gets into a good position.
712   // Those std::min and std::max prevent such penalties.
713   const float top_energy = m_topExternalWeight * std::min<float>(top_grad, 0.0f);
714   const float bottom_energy = m_bottomExternalWeight * std::max<float>(bottom_grad, 0.0f);
715 
716   // Positive gradient indicates the bottom edge and vice versa.
717   // Note that negative energies are fine with us - the less the better.
718   return top_energy - bottom_energy;
719 }
720 
calcElasticityEnergy(const SnakeNode & node1,const SnakeNode & node2,float avg_dist)721 float TextLineRefiner::Optimizer::calcElasticityEnergy(const SnakeNode& node1, const SnakeNode& node2, float avg_dist) {
722   const Vec2f vec(node1.center - node2.center);
723   const auto vec_len = static_cast<const float>(std::sqrt(vec.squaredNorm()));
724 
725   if (vec_len < 1.0f) {
726     return 1000.0f;  // Penalty for moving too close to another node.
727   }
728 
729   const auto dist_diff = std::fabs(avg_dist - vec_len);
730 
731   return m_elasticityWeight * (dist_diff / avg_dist);
732 }
733 
calcBendingEnergy(const SnakeNode & node,const SnakeNode & prev_node,const SnakeNode & prev_prev_node)734 float TextLineRefiner::Optimizer::calcBendingEnergy(const SnakeNode& node,
735                                                     const SnakeNode& prev_node,
736                                                     const SnakeNode& prev_prev_node) {
737   const Vec2f vec(node.center - prev_node.center);
738   const auto vec_len = static_cast<const float>(std::sqrt(vec.squaredNorm()));
739 
740   if (vec_len < 1.0f) {
741     return 1000.0f;  // Penalty for moving too close to another node.
742   }
743 
744   const Vec2f prev_vec(prev_node.center - prev_prev_node.center);
745   const auto prev_vec_len = static_cast<const float>(std::sqrt(prev_vec.squaredNorm()));
746   if (prev_vec_len < 1.0f) {
747     return 1000.0f;  // Penalty for moving too close to another node.
748   }
749 
750   const Vec2f bend_vec(vec / vec_len - prev_vec / prev_vec_len);
751 
752   return m_bendingWeight * bend_vec.squaredNorm();
753 }
754 }  // namespace dewarping