1 /*
2 Scan Tailor - Interactive post-processing tool for scanned pages.
3 Copyright (C) Joseph Artsimovich <joseph.artsimovich@gmail.com>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "TextLineRefiner.h"
20 #include <QDebug>
21 #include <QPainter>
22 #include <boost/foreach.hpp>
23 #include <boost/lambda/bind.hpp>
24 #include <boost/lambda/lambda.hpp>
25 #include <cmath>
26 #include "DebugImages.h"
27 #include "NumericTraits.h"
28 #include "imageproc/GaussBlur.h"
29 #include "imageproc/Sobel.h"
30
31 using namespace imageproc;
32
33 namespace dewarping {
34 class TextLineRefiner::SnakeLength {
35 public:
36 explicit SnakeLength(const Snake& snake);
37
totalLength() const38 float totalLength() const { return m_totalLength; }
39
avgSegmentLength() const40 float avgSegmentLength() const { return m_avgSegmentLength; }
41
arcLengthAt(size_t node_idx) const42 float arcLengthAt(size_t node_idx) const { return m_integralLength[node_idx]; }
43
arcLengthFractionAt(size_t node_idx) const44 float arcLengthFractionAt(size_t node_idx) const { return m_integralLength[node_idx] * m_reciprocalTotalLength; }
45
lengthFromTo(size_t from_node_idx,size_t to_node_idx) const46 float lengthFromTo(size_t from_node_idx, size_t to_node_idx) const {
47 return m_integralLength[to_node_idx] - m_integralLength[from_node_idx];
48 }
49
50 private:
51 std::vector<float> m_integralLength;
52 float m_totalLength;
53 float m_reciprocalTotalLength;
54 float m_avgSegmentLength;
55 };
56
57
58 struct TextLineRefiner::FrenetFrame {
59 Vec2f unitTangent;
60 Vec2f unitDownNormal;
61 };
62
63
64 class TextLineRefiner::Optimizer {
65 public:
66 Optimizer(const Snake& snake, const Vec2f& unit_down_vec, float factor);
67
68 bool thicknessAdjustment(Snake& snake, const Grid<float>& gradient);
69
70 bool tangentMovement(Snake& snake, const Grid<float>& gradient);
71
72 bool normalMovement(Snake& snake, const Grid<float>& gradient);
73
74 private:
75 static float calcExternalEnergy(const Grid<float>& gradient, const SnakeNode& node, Vec2f down_normal);
76
77 static float calcElasticityEnergy(const SnakeNode& node1, const SnakeNode& node2, float avg_dist);
78
79 static float calcBendingEnergy(const SnakeNode& node, const SnakeNode& prev_node, const SnakeNode& prev_prev_node);
80
81 static const float m_elasticityWeight;
82 static const float m_bendingWeight;
83 static const float m_topExternalWeight;
84 static const float m_bottomExternalWeight;
85 const float m_factor;
86 SnakeLength m_snakeLength;
87 std::vector<FrenetFrame> m_frenetFrames;
88 };
89
90
TextLineRefiner(const GrayImage & image,const Dpi & dpi,const Vec2f & unit_down_vector)91 TextLineRefiner::TextLineRefiner(const GrayImage& image, const Dpi& dpi, const Vec2f& unit_down_vector)
92 : m_image(image), m_dpi(dpi), m_unitDownVec(unit_down_vector) {}
93
refine(std::list<std::vector<QPointF>> & polylines,const int iterations,DebugImages * dbg) const94 void TextLineRefiner::refine(std::list<std::vector<QPointF>>& polylines, const int iterations, DebugImages* dbg) const {
95 if (polylines.empty()) {
96 return;
97 }
98
99 std::vector<Snake> snakes;
100 snakes.reserve(polylines.size());
101
102 // Convert from polylines to snakes.
103 for (const std::vector<QPointF>& polyline : polylines) {
104 snakes.push_back(makeSnake(polyline, iterations));
105 }
106
107 if (dbg) {
108 dbg->add(visualizeSnakes(snakes), "initial_snakes");
109 }
110
111 Grid<float> gradient(m_image.width(), m_image.height(), /*padding=*/0);
112
113 // Start with a rather strong blur.
114 float h_sigma = (4.0f / 200.f) * m_dpi.horizontal();
115 float v_sigma = (4.0f / 200.f) * m_dpi.vertical();
116 calcBlurredGradient(gradient, h_sigma, v_sigma);
117
118 for (Snake& snake : snakes) {
119 evolveSnake(snake, gradient, ON_CONVERGENCE_STOP);
120 }
121 if (dbg) {
122 dbg->add(visualizeSnakes(snakes, &gradient), "evolved_snakes1");
123 }
124
125 // Less blurring this time.
126 h_sigma *= 0.5f;
127 v_sigma *= 0.5f;
128 calcBlurredGradient(gradient, h_sigma, v_sigma);
129
130 for (Snake& snake : snakes) {
131 evolveSnake(snake, gradient, ON_CONVERGENCE_GO_FINER);
132 }
133 if (dbg) {
134 dbg->add(visualizeSnakes(snakes, &gradient), "evolved_snakes2");
135 }
136
137 // Convert from snakes back to polylines.
138 int i = -1;
139 for (std::vector<QPointF>& polyline : polylines) {
140 ++i;
141 const Snake& snake = snakes[i];
142 polyline.clear();
143 for (const SnakeNode& node : snake.nodes) {
144 polyline.push_back(node.center);
145 }
146 }
147 } // TextLineRefiner::refine
148
calcBlurredGradient(Grid<float> & gradient,float h_sigma,float v_sigma) const149 void TextLineRefiner::calcBlurredGradient(Grid<float>& gradient, float h_sigma, float v_sigma) const {
150 using namespace boost::lambda;
151
152 const float downscale = 1.0f / (255.0f * 8.0f);
153 Grid<float> vert_grad(m_image.width(), m_image.height(), /*padding=*/0);
154 horizontalSobel<float>(m_image.width(), m_image.height(), m_image.data(), m_image.stride(), _1 * downscale,
155 gradient.data(), gradient.stride(), _1 = _2, _1, gradient.data(), gradient.stride(), _1 = _2);
156 verticalSobel<float>(m_image.width(), m_image.height(), m_image.data(), m_image.stride(), _1 * downscale,
157 vert_grad.data(), vert_grad.stride(), _1 = _2, _1, gradient.data(), gradient.stride(),
158 _1 = _1 * m_unitDownVec[0] + _2 * m_unitDownVec[1]);
159 Grid<float>().swap(vert_grad); // Save memory.
160 gaussBlurGeneric(m_image.size(), h_sigma, v_sigma, gradient.data(), gradient.stride(), _1, gradient.data(),
161 gradient.stride(), _1 = _2);
162 }
163
externalEnergyAt(const Grid<float> & gradient,const Vec2f & pos,float penalty_if_outside)164 float TextLineRefiner::externalEnergyAt(const Grid<float>& gradient, const Vec2f& pos, float penalty_if_outside) {
165 const auto x_base = static_cast<const float>(std::floor(pos[0]));
166 const auto y_base = static_cast<const float>(std::floor(pos[1]));
167 const auto x_base_i = (int) x_base;
168 const auto y_base_i = (int) y_base;
169
170 if ((x_base_i < 0) || (y_base_i < 0) || (x_base_i + 1 >= gradient.width()) || (y_base_i + 1 >= gradient.height())) {
171 return penalty_if_outside;
172 }
173
174 const float x = pos[0] - x_base;
175 const float y = pos[1] - y_base;
176 const float x1 = 1.0f - x;
177 const float y1 = 1.0f - y;
178
179 const int stride = gradient.stride();
180 const float* base = gradient.data() + y_base_i * stride + x_base_i;
181
182 return base[0] * x1 * y1 + base[1] * x * y1 + base[stride] * x1 * y + base[stride + 1] * x * y;
183 }
184
makeSnake(const std::vector<QPointF> & polyline,const int iterations)185 TextLineRefiner::Snake TextLineRefiner::makeSnake(const std::vector<QPointF>& polyline, const int iterations) {
186 float total_length = 0;
187
188 const size_t polyline_size = polyline.size();
189 for (size_t i = 1; i < polyline_size; ++i) {
190 total_length += std::sqrt(Vec2f(polyline[i] - polyline[i - 1]).squaredNorm());
191 }
192
193 const auto points_in_snake = static_cast<const int>(total_length / 20);
194 Snake snake;
195 snake.iterationsRemaining = iterations;
196
197 int points_inserted = 0;
198 float base_t = 0;
199 float next_insert_t = 0;
200 for (size_t i = 1; i < polyline_size; ++i) {
201 const Vec2f base(polyline[i - 1]);
202 const Vec2f vec((polyline[i] - base));
203 const auto next_t = static_cast<const float>(base_t + std::sqrt(vec.squaredNorm()));
204
205 while (next_t >= next_insert_t) {
206 const float fraction = (next_insert_t - base_t) / (next_t - base_t);
207 SnakeNode node;
208 node.center = base + fraction * vec;
209 node.ribHalfLength = 4;
210 snake.nodes.push_back(node);
211 ++points_inserted;
212 next_insert_t = total_length * points_inserted / (points_in_snake - 1);
213 }
214
215 base_t = next_t;
216 }
217
218 return snake;
219 } // TextLineRefiner::makeSnake
220
calcFrenetFrames(std::vector<FrenetFrame> & frenet_frames,const Snake & snake,const SnakeLength & snake_length,const Vec2f & unit_down_vec)221 void TextLineRefiner::calcFrenetFrames(std::vector<FrenetFrame>& frenet_frames,
222 const Snake& snake,
223 const SnakeLength& snake_length,
224 const Vec2f& unit_down_vec) {
225 const size_t num_nodes = snake.nodes.size();
226 frenet_frames.resize(num_nodes);
227
228 if (num_nodes == 0) {
229 return;
230 } else if (num_nodes == 1) {
231 frenet_frames[0].unitTangent = Vec2f();
232 frenet_frames[0].unitDownNormal = Vec2f();
233
234 return;
235 }
236
237 // First segment.
238 Vec2f first_segment(snake.nodes[1].center - snake.nodes[0].center);
239 const float first_segment_len = snake_length.arcLengthAt(1);
240 if (first_segment_len > std::numeric_limits<float>::epsilon()) {
241 first_segment /= first_segment_len;
242 frenet_frames.front().unitTangent = first_segment;
243 }
244 // Segments between first and last, exclusive.
245 Vec2f prev_segment(first_segment);
246 for (size_t i = 1; i < num_nodes - 1; ++i) {
247 Vec2f next_segment(snake.nodes[i + 1].center - snake.nodes[i].center);
248 const float next_segment_len = snake_length.lengthFromTo(i, i + 1);
249 if (next_segment_len > std::numeric_limits<float>::epsilon()) {
250 next_segment /= next_segment_len;
251 }
252
253 Vec2f tangent_vec(0.5 * (prev_segment + next_segment));
254 const auto len = static_cast<const float>(std::sqrt(tangent_vec.squaredNorm()));
255 if (len > std::numeric_limits<float>::epsilon()) {
256 tangent_vec /= len;
257 }
258 frenet_frames[i].unitTangent = tangent_vec;
259
260 prev_segment = next_segment;
261 }
262
263 // Last segments.
264 Vec2f last_segment(snake.nodes[num_nodes - 1].center - snake.nodes[num_nodes - 2].center);
265 const float last_segment_len = snake_length.lengthFromTo(num_nodes - 2, num_nodes - 1);
266 if (last_segment_len > std::numeric_limits<float>::epsilon()) {
267 last_segment /= last_segment_len;
268 frenet_frames.back().unitTangent = last_segment;
269 }
270
271 // Calculate normals and make sure they point down.
272 for (FrenetFrame& frame : frenet_frames) {
273 frame.unitDownNormal = Vec2f(frame.unitTangent[1], -frame.unitTangent[0]);
274 if (frame.unitDownNormal.dot(unit_down_vec) < 0) {
275 frame.unitDownNormal = -frame.unitDownNormal;
276 }
277 }
278 } // TextLineRefiner::calcFrenetFrames
279
evolveSnake(Snake & snake,const Grid<float> & gradient,const OnConvergence on_convergence) const280 void TextLineRefiner::evolveSnake(Snake& snake, const Grid<float>& gradient, const OnConvergence on_convergence) const {
281 float factor = 1.0f;
282
283 while (snake.iterationsRemaining > 0) {
284 --snake.iterationsRemaining;
285
286 Optimizer optimizer(snake, m_unitDownVec, factor);
287 bool changed = false;
288 changed |= optimizer.thicknessAdjustment(snake, gradient);
289 changed |= optimizer.tangentMovement(snake, gradient);
290 changed |= optimizer.normalMovement(snake, gradient);
291
292 if (!changed) {
293 // qDebug() << "Converged. Iterations remaining = " << snake.iterationsRemaining;
294 if (on_convergence == ON_CONVERGENCE_STOP) {
295 break;
296 } else {
297 factor *= 0.5f;
298 }
299 }
300 }
301 }
302
visualizeGradient(const Grid<float> & gradient) const303 QImage TextLineRefiner::visualizeGradient(const Grid<float>& gradient) const {
304 const int width = gradient.width();
305 const int height = gradient.height();
306 const int gradient_stride = gradient.stride();
307 // First let's find the maximum and minimum values.
308 float min_value = NumericTraits<float>::max();
309 float max_value = NumericTraits<float>::min();
310
311 const float* gradient_line = gradient.data();
312 for (int y = 0; y < height; ++y) {
313 for (int x = 0; x < width; ++x) {
314 const float value = gradient_line[x];
315 if (value < min_value) {
316 min_value = value;
317 } else if (value > max_value) {
318 max_value = value;
319 }
320 }
321 gradient_line += gradient_stride;
322 }
323
324 float scale = std::max(max_value, -min_value);
325 if (scale > std::numeric_limits<float>::epsilon()) {
326 scale = 255.0f / scale;
327 }
328
329 QImage overlay(width, height, QImage::Format_ARGB32_Premultiplied);
330 auto* overlay_line = (uint32_t*) overlay.bits();
331 const int overlay_stride = overlay.bytesPerLine() / 4;
332
333 gradient_line = gradient.data();
334 for (int y = 0; y < height; ++y) {
335 for (int x = 0; x < width; ++x) {
336 const float value = gradient_line[x] * scale;
337 const int magnitude = qBound(0, static_cast<const int&>(std::round(std::fabs(value))), 255);
338 if (value > 0) {
339 // Red for positive gradients which indicate bottom edges.
340 overlay_line[x] = qRgba(magnitude, 0, 0, magnitude);
341 } else {
342 overlay_line[x] = qRgba(0, 0, magnitude, magnitude);
343 }
344 }
345 gradient_line += gradient_stride;
346 overlay_line += overlay_stride;
347 }
348
349 QImage canvas(m_image.toQImage().convertToFormat(QImage::Format_ARGB32_Premultiplied));
350 QPainter painter(&canvas);
351 painter.drawImage(0, 0, overlay);
352
353 return canvas;
354 } // TextLineRefiner::visualizeGradient
355
visualizeSnakes(const std::vector<Snake> & snakes,const Grid<float> * gradient) const356 QImage TextLineRefiner::visualizeSnakes(const std::vector<Snake>& snakes, const Grid<float>* gradient) const {
357 QImage canvas;
358 if (gradient) {
359 canvas = visualizeGradient(*gradient);
360 } else {
361 canvas = m_image.toQImage().convertToFormat(QImage::Format_ARGB32_Premultiplied);
362 }
363
364 QPainter painter(&canvas);
365 painter.setRenderHint(QPainter::Antialiasing);
366
367 QPen top_pen(QColor(0, 0, 255));
368 top_pen.setWidthF(1.5);
369
370 QPen bottom_pen(QColor(255, 0, 0));
371 bottom_pen.setWidthF(1.5);
372
373 QPen middle_pen(QColor(255, 0, 255));
374 middle_pen.setWidth(static_cast<int>(1.5));
375
376 QBrush knot_brush(QColor(255, 255, 0, 180));
377 painter.setBrush(knot_brush);
378
379 QRectF knot_rect(0, 0, 7, 7);
380 std::vector<FrenetFrame> frenet_frames;
381
382 for (const Snake& snake : snakes) {
383 const SnakeLength snake_length(snake);
384 calcFrenetFrames(frenet_frames, snake, snake_length, m_unitDownVec);
385 QVector<QPointF> top_polyline;
386 QVector<QPointF> middle_polyline;
387 QVector<QPointF> bottom_polyline;
388
389 const size_t num_nodes = snake.nodes.size();
390 for (size_t i = 0; i < num_nodes; ++i) {
391 const QPointF mid(snake.nodes[i].center + QPointF(0.5, 0.5));
392 const QPointF top(mid - snake.nodes[i].ribHalfLength * frenet_frames[i].unitDownNormal);
393 const QPointF bottom(mid + snake.nodes[i].ribHalfLength * frenet_frames[i].unitDownNormal);
394 top_polyline << top;
395 middle_polyline << mid;
396 bottom_polyline << bottom;
397 }
398
399 // Draw polylines.
400 painter.setPen(top_pen);
401 painter.drawPolyline(top_polyline);
402
403 painter.setPen(bottom_pen);
404 painter.drawPolyline(bottom_polyline);
405
406 painter.setPen(middle_pen);
407 painter.drawPolyline(middle_polyline);
408
409 // Draw knots.
410 painter.setPen(Qt::NoPen);
411 for (const QPointF& pt : middle_polyline) {
412 knot_rect.moveCenter(pt);
413 painter.drawEllipse(knot_rect);
414 }
415 }
416
417 return canvas;
418 } // TextLineRefiner::visualizeSnakes
419
420 /*============================ SnakeLength =============================*/
421
SnakeLength(const Snake & snake)422 TextLineRefiner::SnakeLength::SnakeLength(const Snake& snake)
423 : m_integralLength(snake.nodes.size()), m_totalLength(), m_reciprocalTotalLength(), m_avgSegmentLength() {
424 const size_t num_nodes = snake.nodes.size();
425 float arc_length_accum = 0;
426 for (size_t i = 1; i < num_nodes; ++i) {
427 const Vec2f vec(snake.nodes[i].center - snake.nodes[i - 1].center);
428 arc_length_accum += std::sqrt(vec.squaredNorm());
429 m_integralLength[i] = arc_length_accum;
430 }
431 m_totalLength = arc_length_accum;
432 if (m_totalLength > std::numeric_limits<float>::epsilon()) {
433 m_reciprocalTotalLength = 1.0f / m_totalLength;
434 }
435 if (num_nodes > 1) {
436 m_avgSegmentLength = m_totalLength / (num_nodes - 1);
437 }
438 }
439
440 /*=========================== Optimizer =============================*/
441
442 const float TextLineRefiner::Optimizer::m_elasticityWeight = 0.2f;
443 const float TextLineRefiner::Optimizer::m_bendingWeight = 1.8f;
444 const float TextLineRefiner::Optimizer::m_topExternalWeight = 1.0f;
445 const float TextLineRefiner::Optimizer::m_bottomExternalWeight = 1.0f;
446
Optimizer(const Snake & snake,const Vec2f & unit_down_vec,float factor)447 TextLineRefiner::Optimizer::Optimizer(const Snake& snake, const Vec2f& unit_down_vec, float factor)
448 : m_factor(factor), m_snakeLength(snake) {
449 calcFrenetFrames(m_frenetFrames, snake, m_snakeLength, unit_down_vec);
450 }
451
thicknessAdjustment(Snake & snake,const Grid<float> & gradient)452 bool TextLineRefiner::Optimizer::thicknessAdjustment(Snake& snake, const Grid<float>& gradient) {
453 const size_t num_nodes = snake.nodes.size();
454
455 const float rib_adjustments[] = {0.0f * m_factor, 0.5f * m_factor, -0.5f * m_factor};
456 enum { NUM_RIB_ADJUSTMENTS = sizeof(rib_adjustments) / sizeof(rib_adjustments[0]) };
457
458 int best_i = 0;
459 int best_j = 0;
460 float best_cost = NumericTraits<float>::max();
461 for (int i = 0; i < NUM_RIB_ADJUSTMENTS; ++i) {
462 const float head_rib = snake.nodes.front().ribHalfLength + rib_adjustments[i];
463 if (head_rib <= std::numeric_limits<float>::epsilon()) {
464 continue;
465 }
466
467 for (int j = 0; j < NUM_RIB_ADJUSTMENTS; ++j) {
468 const float tail_rib = snake.nodes.back().ribHalfLength + rib_adjustments[j];
469 if (tail_rib <= std::numeric_limits<float>::epsilon()) {
470 continue;
471 }
472
473 float cost = 0;
474 for (size_t node_idx = 0; node_idx < num_nodes; ++node_idx) {
475 const float t = m_snakeLength.arcLengthFractionAt(node_idx);
476 const float rib = head_rib + t * (tail_rib - head_rib);
477 const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
478
479 SnakeNode node(snake.nodes[node_idx]);
480 node.ribHalfLength = rib;
481 cost += calcExternalEnergy(gradient, node, down_normal);
482 }
483 if (cost < best_cost) {
484 best_cost = cost;
485 best_i = i;
486 best_j = j;
487 }
488 }
489 }
490 const float head_rib = snake.nodes.front().ribHalfLength + rib_adjustments[best_i];
491 const float tail_rib = snake.nodes.back().ribHalfLength + rib_adjustments[best_j];
492 for (size_t node_idx = 0; node_idx < num_nodes; ++node_idx) {
493 const float t = m_snakeLength.arcLengthFractionAt(node_idx);
494 snake.nodes[node_idx].ribHalfLength = head_rib + t * (tail_rib - head_rib);
495 // Note that we need to recalculate inner ribs even if outer ribs
496 // didn't change, as movement of ribs in tangent direction affects
497 // interpolation.
498 }
499
500 return rib_adjustments[best_i] != 0 || rib_adjustments[best_j] != 0;
501 } // TextLineRefiner::Optimizer::thicknessAdjustment
502
tangentMovement(Snake & snake,const Grid<float> & gradient)503 bool TextLineRefiner::Optimizer::tangentMovement(Snake& snake, const Grid<float>& gradient) {
504 const size_t num_nodes = snake.nodes.size();
505 if (num_nodes < 3) {
506 return false;
507 }
508
509 const float tangent_movements[] = {0.0f * m_factor, 1.0f * m_factor, -1.0f * m_factor};
510 enum { NUM_TANGENT_MOVEMENTS = sizeof(tangent_movements) / sizeof(tangent_movements[0]) };
511
512 std::vector<uint32_t> paths;
513 std::vector<uint32_t> new_paths;
514 std::vector<Step> step_storage;
515 // Note that we don't move the first and the last node in tangent direction.
516 paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
517 step_storage.emplace_back();
518 step_storage.back().prevStepIdx = ~uint32_t(0);
519 step_storage.back().node = snake.nodes.front();
520 step_storage.back().pathCost = 0;
521
522 for (size_t node_idx = 1; node_idx < num_nodes - 1; ++node_idx) {
523 const Vec2f initial_pos(snake.nodes[node_idx].center);
524 const float rib = snake.nodes[node_idx].ribHalfLength;
525 const Vec2f unit_tangent(m_frenetFrames[node_idx].unitTangent);
526 const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
527
528 for (float tangent_movement : tangent_movements) {
529 Step step;
530 step.prevStepIdx = ~uint32_t(0);
531 step.node.center = initial_pos + tangent_movement * unit_tangent;
532 step.node.ribHalfLength = rib;
533 step.pathCost = NumericTraits<float>::max();
534
535 float base_cost = calcExternalEnergy(gradient, step.node, down_normal);
536
537 if (node_idx == num_nodes - 2) {
538 // Take into account the distance to the last node as well.
539 base_cost += calcElasticityEnergy(step.node, snake.nodes.back(), m_snakeLength.avgSegmentLength());
540 }
541
542 // Now find the best step for the previous node to combine with.
543 for (uint32_t prev_step_idx : paths) {
544 const Step& prev_step = step_storage[prev_step_idx];
545 const float cost = base_cost + prev_step.pathCost
546 + calcElasticityEnergy(step.node, prev_step.node, m_snakeLength.avgSegmentLength());
547
548 if (cost < step.pathCost) {
549 step.pathCost = cost;
550 step.prevStepIdx = prev_step_idx;
551 }
552 }
553 assert(step.prevStepIdx != ~uint32_t(0));
554 new_paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
555 step_storage.push_back(step);
556 }
557 assert(!new_paths.empty());
558 paths.swap(new_paths);
559 new_paths.clear();
560 }
561
562 // Find the best overall path.
563 uint32_t best_path_idx = ~uint32_t(0);
564 float best_cost = NumericTraits<float>::max();
565 for (uint32_t last_step_idx : paths) {
566 const Step& step = step_storage[last_step_idx];
567 if (step.pathCost < best_cost) {
568 best_cost = step.pathCost;
569 best_path_idx = last_step_idx;
570 }
571 }
572 // Having found the best path, convert it back to a snake.
573 float max_sqdist = 0;
574 uint32_t step_idx = best_path_idx;
575 for (auto node_idx = static_cast<int>(num_nodes - 2); node_idx > 0; --node_idx) {
576 assert(step_idx != ~uint32_t(0));
577 const Step& step = step_storage[step_idx];
578 SnakeNode& node = snake.nodes[node_idx];
579
580 const float sqdist = (node.center - step.node.center).squaredNorm();
581 max_sqdist = std::max<float>(max_sqdist, sqdist);
582
583 node = step.node;
584 step_idx = step.prevStepIdx;
585 }
586
587 return max_sqdist > std::numeric_limits<float>::epsilon();
588 } // TextLineRefiner::Optimizer::tangentMovement
589
normalMovement(Snake & snake,const Grid<float> & gradient)590 bool TextLineRefiner::Optimizer::normalMovement(Snake& snake, const Grid<float>& gradient) {
591 const size_t num_nodes = snake.nodes.size();
592 if (num_nodes < 3) {
593 return false;
594 }
595
596 const float normal_movements[] = {0.0f * m_factor, 1.0f * m_factor, -1.0f * m_factor};
597 enum { NUM_NORMAL_MOVEMENTS = sizeof(normal_movements) / sizeof(normal_movements[0]) };
598
599 std::vector<uint32_t> paths;
600 std::vector<uint32_t> new_paths;
601 std::vector<Step> step_storage;
602 // The first two nodes pose a problem for us. These nodes don't have two predecessors,
603 // and therefore we can't take bending into the account. We could take the followers
604 // instead of the ancestors, but then this follower is going to move itself, making
605 // our calculations less accurate. The proper solution is to provide not N but N*N
606 // paths to the 3rd node, each path corresponding to a combination of movement of
607 // the first and the second node. That's the approach we are taking here.
608 for (float normal_movement : normal_movements) {
609 const auto prev_step_idx = static_cast<const uint32_t>(step_storage.size());
610 {
611 // Movements of the first node.
612 const Vec2f down_normal(m_frenetFrames[0].unitDownNormal);
613 Step step;
614 step.node.center = snake.nodes[0].center + normal_movement * down_normal;
615 step.node.ribHalfLength = snake.nodes[0].ribHalfLength;
616 step.prevStepIdx = ~uint32_t(0);
617 step.pathCost = calcExternalEnergy(gradient, step.node, down_normal);
618
619 step_storage.push_back(step);
620 }
621
622 for (float j : normal_movements) {
623 // Movements of the second node.
624 const Vec2f down_normal(m_frenetFrames[1].unitDownNormal);
625
626 Step step;
627 step.node.center = snake.nodes[1].center + j * down_normal;
628 step.node.ribHalfLength = snake.nodes[1].ribHalfLength;
629 step.prevStepIdx = prev_step_idx;
630 step.pathCost = step_storage[prev_step_idx].pathCost + calcExternalEnergy(gradient, step.node, down_normal);
631
632 paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
633 step_storage.push_back(step);
634 }
635 }
636
637 for (size_t node_idx = 2; node_idx < num_nodes; ++node_idx) {
638 const SnakeNode& node = snake.nodes[node_idx];
639 const Vec2f down_normal(m_frenetFrames[node_idx].unitDownNormal);
640
641 for (float normal_movement : normal_movements) {
642 Step step;
643 step.prevStepIdx = ~uint32_t(0);
644 step.node.center = node.center + normal_movement * down_normal;
645 step.node.ribHalfLength = node.ribHalfLength;
646 step.pathCost = NumericTraits<float>::max();
647
648 const float base_cost = calcExternalEnergy(gradient, step.node, down_normal);
649
650 // Now find the best step for the previous node to combine with.
651 for (uint32_t prev_step_idx : paths) {
652 const Step& prev_step = step_storage[prev_step_idx];
653 const Step& prev_prev_step = step_storage[prev_step.prevStepIdx];
654
655 const float cost
656 = base_cost + prev_step.pathCost + calcBendingEnergy(step.node, prev_step.node, prev_prev_step.node);
657
658 if (cost < step.pathCost) {
659 step.pathCost = cost;
660 step.prevStepIdx = prev_step_idx;
661 }
662 }
663 assert(step.prevStepIdx != ~uint32_t(0));
664 new_paths.push_back(static_cast<unsigned int&&>(step_storage.size()));
665 step_storage.push_back(step);
666 }
667 assert(!new_paths.empty());
668 paths.swap(new_paths);
669 new_paths.clear();
670 }
671
672 // Find the best overall path.
673 uint32_t best_path_idx = ~uint32_t(0);
674 float best_cost = NumericTraits<float>::max();
675 for (uint32_t last_step_idx : paths) {
676 const Step& step = step_storage[last_step_idx];
677 if (step.pathCost < best_cost) {
678 best_cost = step.pathCost;
679 best_path_idx = last_step_idx;
680 }
681 }
682 // Having found the best path, convert it back to a snake.
683 float max_sqdist = 0;
684 uint32_t step_idx = best_path_idx;
685 for (auto node_idx = static_cast<int>(num_nodes - 1); node_idx >= 0; --node_idx) {
686 assert(step_idx != ~uint32_t(0));
687 const Step& step = step_storage[step_idx];
688 SnakeNode& node = snake.nodes[node_idx];
689
690 const float sqdist = (node.center - step.node.center).squaredNorm();
691 max_sqdist = std::max<float>(max_sqdist, sqdist);
692
693 node = step.node;
694 step_idx = step.prevStepIdx;
695 }
696
697 return max_sqdist > std::numeric_limits<float>::epsilon();
698 } // TextLineRefiner::Optimizer::normalMovement
699
calcExternalEnergy(const Grid<float> & gradient,const SnakeNode & node,const Vec2f down_normal)700 float TextLineRefiner::Optimizer::calcExternalEnergy(const Grid<float>& gradient,
701 const SnakeNode& node,
702 const Vec2f down_normal) {
703 const Vec2f top(node.center - node.ribHalfLength * down_normal);
704 const Vec2f bottom(node.center + node.ribHalfLength * down_normal);
705
706 const float top_grad = externalEnergyAt(gradient, top, 0.0f);
707 const float bottom_grad = externalEnergyAt(gradient, bottom, 0.0f);
708
709 // Surprisingly, it turns out it's a bad idea to penalize for the opposite
710 // sign in the gradient. Sometimes a snake's edge has to move over the
711 // "wrong" gradient ridge before it gets into a good position.
712 // Those std::min and std::max prevent such penalties.
713 const float top_energy = m_topExternalWeight * std::min<float>(top_grad, 0.0f);
714 const float bottom_energy = m_bottomExternalWeight * std::max<float>(bottom_grad, 0.0f);
715
716 // Positive gradient indicates the bottom edge and vice versa.
717 // Note that negative energies are fine with us - the less the better.
718 return top_energy - bottom_energy;
719 }
720
calcElasticityEnergy(const SnakeNode & node1,const SnakeNode & node2,float avg_dist)721 float TextLineRefiner::Optimizer::calcElasticityEnergy(const SnakeNode& node1, const SnakeNode& node2, float avg_dist) {
722 const Vec2f vec(node1.center - node2.center);
723 const auto vec_len = static_cast<const float>(std::sqrt(vec.squaredNorm()));
724
725 if (vec_len < 1.0f) {
726 return 1000.0f; // Penalty for moving too close to another node.
727 }
728
729 const auto dist_diff = std::fabs(avg_dist - vec_len);
730
731 return m_elasticityWeight * (dist_diff / avg_dist);
732 }
733
calcBendingEnergy(const SnakeNode & node,const SnakeNode & prev_node,const SnakeNode & prev_prev_node)734 float TextLineRefiner::Optimizer::calcBendingEnergy(const SnakeNode& node,
735 const SnakeNode& prev_node,
736 const SnakeNode& prev_prev_node) {
737 const Vec2f vec(node.center - prev_node.center);
738 const auto vec_len = static_cast<const float>(std::sqrt(vec.squaredNorm()));
739
740 if (vec_len < 1.0f) {
741 return 1000.0f; // Penalty for moving too close to another node.
742 }
743
744 const Vec2f prev_vec(prev_node.center - prev_prev_node.center);
745 const auto prev_vec_len = static_cast<const float>(std::sqrt(prev_vec.squaredNorm()));
746 if (prev_vec_len < 1.0f) {
747 return 1000.0f; // Penalty for moving too close to another node.
748 }
749
750 const Vec2f bend_vec(vec / vec_len - prev_vec / prev_vec_len);
751
752 return m_bendingWeight * bend_vec.squaredNorm();
753 }
754 } // namespace dewarping