1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifdef HAVE_CONFIG_H
15 #  include "config_auto.h"
16 #endif
17 
18 #include <allheaders.h>
19 #include "bbgrid.h"  // Base class.
20 #include "blobbox.h" // BlobNeighourDir.
21 #include "blobs.h"
22 #include "colpartition.h"
23 #include "helpers.h" // for IntCastRounded
24 #include "normalis.h"
25 #include "textlineprojection.h"
26 
27 #include <algorithm>
28 
29 // Padding factor to use on definitely oriented blobs
30 const int kOrientedPadFactor = 8;
31 // Padding factor to use on not definitely oriented blobs.
32 const int kDefaultPadFactor = 2;
33 // Penalty factor for going away from the line center.
34 const int kWrongWayPenalty = 4;
35 // Ratio between parallel gap and perpendicular gap used to measure total
36 // distance of a box from a target box in curved textline space.
37 // parallel-gap is treated more favorably by this factor to allow catching
38 // quotes and ellipsis at the end of textlines.
39 const int kParaPerpDistRatio = 4;
40 // Multiple of scale_factor_ that the inter-line gap must be before we start
41 // padding the increment box perpendicular to the text line.
42 const int kMinLineSpacingFactor = 4;
43 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
44 const int kMaxTabStopOverrun = 6;
45 
46 namespace tesseract {
47 
TextlineProjection(int resolution)48 TextlineProjection::TextlineProjection(int resolution) : x_origin_(0), y_origin_(0), pix_(nullptr) {
49   // The projection map should be about 100 ppi, whatever the input.
50   scale_factor_ = IntCastRounded(resolution / 100.0);
51   if (scale_factor_ < 1) {
52     scale_factor_ = 1;
53   }
54 }
~TextlineProjection()55 TextlineProjection::~TextlineProjection() {
56   pix_.destroy();
57 }
58 
59 // Build the projection profile given the input_block containing lists of
60 // blobs, a rotation to convert to image coords,
61 // and a full-resolution nontext_map, marking out areas to avoid.
62 // During construction, we have the following assumptions:
63 // The rotation is a multiple of 90 degrees, ie no deskew yet.
64 // The blobs have had their left and right rules set to also limit
65 // the range of projection.
ConstructProjection(TO_BLOCK * input_block,const FCOORD & rotation,Image nontext_map)66 void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation,
67                                              Image nontext_map) {
68   pix_.destroy();
69   TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
70   x_origin_ = 0;
71   y_origin_ = image_box.height();
72   int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
73   int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
74 
75   pix_ = pixCreate(width, height, 8);
76   ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
77   ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
78   Image final_pix = pixBlockconv(pix_, 1, 1);
79   //  Pix* final_pix = pixBlockconv(pix_, 2, 2);
80   pix_.destroy();
81   pix_ = final_pix;
82 }
83 
84 #ifndef GRAPHICS_DISABLED
85 
86 // Display the blobs in the window colored according to textline quality.
PlotGradedBlobs(BLOBNBOX_LIST * blobs,ScrollView * win)87 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win) {
88   BLOBNBOX_IT it(blobs);
89   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
90     BLOBNBOX *blob = it.data();
91     const TBOX &box = blob->bounding_box();
92     bool bad_box = BoxOutOfHTextline(box, nullptr, false);
93     if (blob->UniquelyVertical()) {
94       win->Pen(ScrollView::YELLOW);
95     } else {
96       win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
97     }
98     win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
99   }
100   win->Update();
101 }
102 
103 #endif // !GRAPHICS_DISABLED
104 
105 // Moves blobs that look like they don't sit well on a textline from the
106 // input blobs list to the output small_blobs list.
107 // This gets them away from initial textline finding to stop diacritics
108 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
MoveNonTextlineBlobs(BLOBNBOX_LIST * blobs,BLOBNBOX_LIST * small_blobs) const109 void TextlineProjection::MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs,
110                                               BLOBNBOX_LIST *small_blobs) const {
111   BLOBNBOX_IT it(blobs);
112   BLOBNBOX_IT small_it(small_blobs);
113   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
114     BLOBNBOX *blob = it.data();
115     const TBOX &box = blob->bounding_box();
116     bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
117     if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
118       blob->ClearNeighbours();
119       small_it.add_to_end(it.extract());
120     }
121   }
122 }
123 
124 #ifndef GRAPHICS_DISABLED
125 
126 // Create a window and display the projection in it.
DisplayProjection() const127 void TextlineProjection::DisplayProjection() const {
128   int width = pixGetWidth(pix_);
129   int height = pixGetHeight(pix_);
130   Image pixc = pixCreate(width, height, 32);
131   int src_wpl = pixGetWpl(pix_);
132   int col_wpl = pixGetWpl(pixc);
133   uint32_t *src_data = pixGetData(pix_);
134   uint32_t *col_data = pixGetData(pixc);
135   for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
136     for (int x = 0; x < width; ++x) {
137       int pixel = GET_DATA_BYTE(src_data, x);
138       l_uint32 result;
139       if (pixel <= 17) {
140         composeRGBPixel(0, 0, pixel * 15, &result);
141       } else if (pixel <= 145) {
142         composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
143       } else {
144         composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
145       }
146       col_data[x] = result;
147     }
148   }
149   auto *win = new ScrollView("Projection", 0, 0, width, height, width, height);
150   win->Draw(pixc, 0, 0);
151   win->Update();
152   pixc.destroy();
153 }
154 
155 #endif // !GRAPHICS_DISABLED
156 
157 // Compute the distance of the box from the partition using curved projection
158 // space. As DistanceOfBoxFromBox, except that the direction is taken from
159 // the ColPartition and the median bounds of the ColPartition are used as
160 // the to_box.
DistanceOfBoxFromPartition(const TBOX & box,const ColPartition & part,const DENORM * denorm,bool debug) const161 int TextlineProjection::DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part,
162                                                    const DENORM *denorm, bool debug) const {
163   // Compute a partition box that uses the median top/bottom of the blobs
164   // within and median left/right for vertical.
165   TBOX part_box = part.bounding_box();
166   if (part.IsHorizontalType()) {
167     part_box.set_top(part.median_top());
168     part_box.set_bottom(part.median_bottom());
169   } else {
170     part_box.set_left(part.median_left());
171     part_box.set_right(part.median_right());
172   }
173   // Now use DistanceOfBoxFromBox to make the actual calculation.
174   return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), denorm, debug);
175 }
176 
177 // Compute the distance from the from_box to the to_box using curved
178 // projection space. Separation that involves a decrease in projection
179 // density (moving from the from_box to the to_box) is weighted more heavily
180 // than constant density, and an increase is weighted less.
181 // If horizontal_textline is true, then curved space is used vertically,
182 // as for a diacritic on the edge of a textline.
183 // The projection uses original image coords, so denorm is used to get
184 // back to the image coords from box/part space.
185 // How the calculation works: Think of a diacritic near a textline.
186 // Distance is measured from the far side of the from_box to the near side of
187 // the to_box. Shown is the horizontal textline case.
188 //          |------^-----|
189 //          | from | box |
190 //          |------|-----|
191 //   perpendicular |
192 //          <------v-------->|--------------------|
193 //                  parallel |     to box         |
194 //                           |--------------------|
195 // Perpendicular distance uses "curved space" See VerticalDistance below.
196 // Parallel distance is linear.
197 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
DistanceOfBoxFromBox(const TBOX & from_box,const TBOX & to_box,bool horizontal_textline,const DENORM * denorm,bool debug) const198 int TextlineProjection::DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box,
199                                              bool horizontal_textline, const DENORM *denorm,
200                                              bool debug) const {
201   // The parallel_gap is the horizontal gap between a horizontal textline and
202   // the box. Analogous for vertical.
203   int parallel_gap = 0;
204   // start_pt is the box end of the line to be modified for curved space.
205   TPOINT start_pt;
206   // end_pt is the partition end of the line to be modified for curved space.
207   TPOINT end_pt;
208   if (horizontal_textline) {
209     parallel_gap = from_box.x_gap(to_box) + from_box.width();
210     start_pt.x = (from_box.left() + from_box.right()) / 2;
211     end_pt.x = start_pt.x;
212     if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
213       start_pt.y = from_box.top();
214       end_pt.y = std::min(to_box.top(), start_pt.y);
215     } else {
216       start_pt.y = from_box.bottom();
217       end_pt.y = std::max(to_box.bottom(), start_pt.y);
218     }
219   } else {
220     parallel_gap = from_box.y_gap(to_box) + from_box.height();
221     if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
222       start_pt.x = from_box.right();
223       end_pt.x = std::min(to_box.right(), start_pt.x);
224     } else {
225       start_pt.x = from_box.left();
226       end_pt.x = std::max(to_box.left(), start_pt.x);
227     }
228     start_pt.y = (from_box.bottom() + from_box.top()) / 2;
229     end_pt.y = start_pt.y;
230   }
231   // The perpendicular gap is the max vertical distance gap out of:
232   // top of from_box to to_box top and bottom of from_box to to_box bottom.
233   // This value is then modified for curved projection space.
234   // Analogous for vertical.
235   int perpendicular_gap = 0;
236   // If start_pt == end_pt, then the from_box lies entirely within the to_box
237   // (in the perpendicular direction), so we don't need to calculate the
238   // perpendicular_gap.
239   if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
240     if (denorm != nullptr) {
241       // Denormalize the start and end.
242       denorm->DenormTransform(nullptr, start_pt, &start_pt);
243       denorm->DenormTransform(nullptr, end_pt, &end_pt);
244     }
245     if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
246       perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y);
247     } else {
248       perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y);
249     }
250   }
251   // The parallel_gap weighs less than the perpendicular_gap.
252   return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
253 }
254 
255 // Compute the distance between (x, y1) and (x, y2) using the rule that
256 // a decrease in textline density is weighted more heavily than an increase.
257 // The coordinates are in source image space, ie processed by any denorm
258 // already, but not yet scaled by scale_factor_.
259 // Going from the outside of a textline to the inside should measure much
260 // less distance than going from the inside of a textline to the outside.
261 // How it works:
262 // An increase is cheap (getting closer to a textline).
263 // Constant costs unity.
264 // A decrease is expensive (getting further from a textline).
265 // Pixels in projection map Counted distance
266 //              2
267 //              3              1/x
268 //              3               1
269 //              2               x
270 //              5              1/x
271 //              7              1/x
272 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
VerticalDistance(bool debug,int x,int y1,int y2) const273 int TextlineProjection::VerticalDistance(bool debug, int x, int y1, int y2) const {
274   x = ImageXToProjectionX(x);
275   y1 = ImageYToProjectionY(y1);
276   y2 = ImageYToProjectionY(y2);
277   if (y1 == y2) {
278     return 0;
279   }
280   int wpl = pixGetWpl(pix_);
281   int step = y1 < y2 ? 1 : -1;
282   uint32_t *data = pixGetData(pix_) + y1 * wpl;
283   wpl *= step;
284   int prev_pixel = GET_DATA_BYTE(data, x);
285   int distance = 0;
286   int right_way_steps = 0;
287   for (int y = y1; y != y2; y += step) {
288     data += wpl;
289     int pixel = GET_DATA_BYTE(data, x);
290     if (debug) {
291       tprintf("At (%d,%d), pix = %d, prev=%d\n", x, y + step, pixel, prev_pixel);
292     }
293     if (pixel < prev_pixel) {
294       distance += kWrongWayPenalty;
295     } else if (pixel > prev_pixel) {
296       ++right_way_steps;
297     } else {
298       ++distance;
299     }
300     prev_pixel = pixel;
301   }
302   return distance * scale_factor_ + right_way_steps * scale_factor_ / kWrongWayPenalty;
303 }
304 
305 // Compute the distance between (x1, y) and (x2, y) using the rule that
306 // a decrease in textline density is weighted more heavily than an increase.
HorizontalDistance(bool debug,int x1,int x2,int y) const307 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, int y) const {
308   x1 = ImageXToProjectionX(x1);
309   x2 = ImageXToProjectionX(x2);
310   y = ImageYToProjectionY(y);
311   if (x1 == x2) {
312     return 0;
313   }
314   int wpl = pixGetWpl(pix_);
315   int step = x1 < x2 ? 1 : -1;
316   uint32_t *data = pixGetData(pix_) + y * wpl;
317   int prev_pixel = GET_DATA_BYTE(data, x1);
318   int distance = 0;
319   int right_way_steps = 0;
320   for (int x = x1; x != x2; x += step) {
321     int pixel = GET_DATA_BYTE(data, x + step);
322     if (debug) {
323       tprintf("At (%d,%d), pix = %d, prev=%d\n", x + step, y, pixel, prev_pixel);
324     }
325     if (pixel < prev_pixel) {
326       distance += kWrongWayPenalty;
327     } else if (pixel > prev_pixel) {
328       ++right_way_steps;
329     } else {
330       ++distance;
331     }
332     prev_pixel = pixel;
333   }
334   return distance * scale_factor_ + right_way_steps * scale_factor_ / kWrongWayPenalty;
335 }
336 
337 // Returns true if the blob appears to be outside of a textline.
338 // Such blobs are potentially diacritics (even if large in Thai) and should
339 // be kept away from initial textline finding.
BoxOutOfHTextline(const TBOX & box,const DENORM * denorm,bool debug) const340 bool TextlineProjection::BoxOutOfHTextline(const TBOX &box, const DENORM *denorm,
341                                            bool debug) const {
342   int grad1 = 0;
343   int grad2 = 0;
344   EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
345   int worst_result = std::min(grad1, grad2);
346   int total_result = grad1 + grad2;
347   if (total_result >= 6) {
348     return false; // Strongly in textline.
349   }
350   // Medium strength: if either gradient is negative, it is likely outside
351   // the body of the textline.
352   if (worst_result < 0) {
353     return true;
354   }
355   return false;
356 }
357 
358 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
359 // but uses the median top/bottom for horizontal and median left/right for
360 // vertical instead of the bounding box edges.
361 // Evaluates for both horizontal and vertical and returns the best result,
362 // with a positive value for horizontal and a negative value for vertical.
EvaluateColPartition(const ColPartition & part,const DENORM * denorm,bool debug) const363 int TextlineProjection::EvaluateColPartition(const ColPartition &part, const DENORM *denorm,
364                                              bool debug) const {
365   if (part.IsSingleton()) {
366     return EvaluateBox(part.bounding_box(), denorm, debug);
367   }
368   // Test vertical orientation.
369   TBOX box = part.bounding_box();
370   // Use the partition median for left/right.
371   box.set_left(part.median_left());
372   box.set_right(part.median_right());
373   int vresult = EvaluateBox(box, denorm, debug);
374 
375   // Test horizontal orientation.
376   box = part.bounding_box();
377   // Use the partition median for top/bottom.
378   box.set_top(part.median_top());
379   box.set_bottom(part.median_bottom());
380   int hresult = EvaluateBox(box, denorm, debug);
381   if (debug) {
382     tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
383     part.bounding_box().print();
384     part.Print();
385   }
386   return hresult >= -vresult ? hresult : vresult;
387 }
388 
389 // Computes the mean projection gradients over the horizontal and vertical
390 // edges of the box:
391 //   -h-h-h-h-h-h
392 //  |------------| mean=htop   -v|+v--------+v|-v
393 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
394 //  |            |             -v|+v        +v|-v
395 //  |    box     |             -v|+v  box   +v|-v
396 //  |            |             -v|+v        +v|-v
397 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
398 //  |------------| mean=hbot   -v|+v--------+v|-v
399 //   -h-h-h-h-h-h
400 //                           mean=vleft  mean=vright
401 //
402 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
403 // for a horizontal textline, a negative number for a vertical textline,
404 // and near zero for undecided. Undecided is most likely non-text.
405 // All the gradients are truncated to remain non-negative, since negative
406 // horizontal gradients don't give any indication of being vertical and
407 // vice versa.
408 // Additional complexity: The coordinates have to be transformed to original
409 // image coordinates with denorm (if not null), scaled to match the projection
410 // pix, and THEN step out 2 pixels each way from the edge to compute the
411 // gradient, and tries 3 positions, each measuring the gradient over a
412 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3).  This complexity is handled by
413 // several layers of helpers below.
EvaluateBox(const TBOX & box,const DENORM * denorm,bool debug) const414 int TextlineProjection::EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const {
415   return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
416 }
417 
418 // Internal version of EvaluateBox returns the unclipped gradients as well
419 // as the result of EvaluateBox.
420 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
EvaluateBoxInternal(const TBOX & box,const DENORM * denorm,bool debug,int * hgrad1,int * hgrad2,int * vgrad1,int * vgrad2) const421 int TextlineProjection::EvaluateBoxInternal(const TBOX &box, const DENORM *denorm, bool debug,
422                                             int *hgrad1, int *hgrad2, int *vgrad1,
423                                             int *vgrad2) const {
424   int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true);
425   int bottom_gradient =
426       -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false);
427   int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true);
428   int right_gradient =
429       -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false);
430   int top_clipped = std::max(top_gradient, 0);
431   int bottom_clipped = std::max(bottom_gradient, 0);
432   int left_clipped = std::max(left_gradient, 0);
433   int right_clipped = std::max(right_gradient, 0);
434   if (debug) {
435     tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient,
436             bottom_gradient, left_gradient, right_gradient);
437     box.print();
438   }
439   int result = std::max(top_clipped, bottom_clipped) - std::max(left_clipped, right_clipped);
440   if (hgrad1 != nullptr && hgrad2 != nullptr) {
441     *hgrad1 = top_gradient;
442     *hgrad2 = bottom_gradient;
443   }
444   if (vgrad1 != nullptr && vgrad2 != nullptr) {
445     *vgrad1 = left_gradient;
446     *vgrad2 = right_gradient;
447   }
448   return result;
449 }
450 
451 // Helper returns the mean gradient value for the horizontal row at the given
452 // y, (in the external coordinates) by subtracting the mean of the transformed
453 // row 2 pixels above from the mean of the transformed row 2 pixels below.
454 // This gives a positive value for a good top edge and negative for bottom.
455 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
BestMeanGradientInRow(const DENORM * denorm,int16_t min_x,int16_t max_x,int16_t y,bool best_is_max) const456 int TextlineProjection::BestMeanGradientInRow(const DENORM *denorm, int16_t min_x, int16_t max_x,
457                                               int16_t y, bool best_is_max) const {
458   TPOINT start_pt(min_x, y);
459   TPOINT end_pt(max_x, y);
460   int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
461   int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
462   int best_gradient = lower - upper;
463   upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
464   lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
465   int gradient = lower - upper;
466   if ((gradient > best_gradient) == best_is_max) {
467     best_gradient = gradient;
468   }
469   upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
470   lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
471   gradient = lower - upper;
472   if ((gradient > best_gradient) == best_is_max) {
473     best_gradient = gradient;
474   }
475   return best_gradient;
476 }
477 
478 // Helper returns the mean gradient value for the vertical column at the
479 // given x, (in the external coordinates) by subtracting the mean of the
480 // transformed column 2 pixels left from the mean of the transformed column
481 // 2 pixels to the right.
482 // This gives a positive value for a good left edge and negative for right.
483 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
BestMeanGradientInColumn(const DENORM * denorm,int16_t x,int16_t min_y,int16_t max_y,bool best_is_max) const484 int TextlineProjection::BestMeanGradientInColumn(const DENORM *denorm, int16_t x, int16_t min_y,
485                                                  int16_t max_y, bool best_is_max) const {
486   TPOINT start_pt(x, min_y);
487   TPOINT end_pt(x, max_y);
488   int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
489   int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
490   int best_gradient = right - left;
491   left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
492   right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
493   int gradient = right - left;
494   if ((gradient > best_gradient) == best_is_max) {
495     best_gradient = gradient;
496   }
497   left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
498   right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
499   gradient = right - left;
500   if ((gradient > best_gradient) == best_is_max) {
501     best_gradient = gradient;
502   }
503   return best_gradient;
504 }
505 
506 // Helper returns the mean pixel value over the line between the start_pt and
507 // end_pt (inclusive), but shifted perpendicular to the line in the projection
508 // image by offset pixels. For simplicity, it is assumed that the vector is
509 // either nearly horizontal or nearly vertical. It works on skewed textlines!
510 // The end points are in external coordinates, and will be denormalized with
511 // the denorm if not nullptr before further conversion to pix coordinates.
512 // After all the conversions, the offset is added to the direction
513 // perpendicular to the line direction. The offset is thus in projection image
514 // coordinates, which allows the caller to get a guaranteed displacement
515 // between pixels used to calculate gradients.
MeanPixelsInLineSegment(const DENORM * denorm,int offset,TPOINT start_pt,TPOINT end_pt) const516 int TextlineProjection::MeanPixelsInLineSegment(const DENORM *denorm, int offset, TPOINT start_pt,
517                                                 TPOINT end_pt) const {
518   TransformToPixCoords(denorm, &start_pt);
519   TransformToPixCoords(denorm, &end_pt);
520   TruncateToImageBounds(&start_pt);
521   TruncateToImageBounds(&end_pt);
522   int wpl = pixGetWpl(pix_);
523   uint32_t *data = pixGetData(pix_);
524   int total = 0;
525   int count = 0;
526   int x_delta = end_pt.x - start_pt.x;
527   int y_delta = end_pt.y - start_pt.y;
528   if (abs(x_delta) >= abs(y_delta)) {
529     if (x_delta == 0) {
530       return 0;
531     }
532     // Horizontal line. Add the offset vertically.
533     int x_step = x_delta > 0 ? 1 : -1;
534     // Correct offset for rotation, keeping it anti-clockwise of the delta.
535     offset *= x_step;
536     start_pt.y += offset;
537     end_pt.y += offset;
538     TruncateToImageBounds(&start_pt);
539     TruncateToImageBounds(&end_pt);
540     x_delta = end_pt.x - start_pt.x;
541     y_delta = end_pt.y - start_pt.y;
542     count = x_delta * x_step + 1;
543     for (int x = start_pt.x; x != end_pt.x; x += x_step) {
544       int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
545       total += GET_DATA_BYTE(data + wpl * y, x);
546     }
547   } else {
548     // Vertical line. Add the offset horizontally.
549     int y_step = y_delta > 0 ? 1 : -1;
550     // Correct offset for rotation, keeping it anti-clockwise of the delta.
551     // Pix holds the image with y=0 at the top, so the offset is negated.
552     offset *= -y_step;
553     start_pt.x += offset;
554     end_pt.x += offset;
555     TruncateToImageBounds(&start_pt);
556     TruncateToImageBounds(&end_pt);
557     x_delta = end_pt.x - start_pt.x;
558     y_delta = end_pt.y - start_pt.y;
559     count = y_delta * y_step + 1;
560     for (int y = start_pt.y; y != end_pt.y; y += y_step) {
561       int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
562       total += GET_DATA_BYTE(data + wpl * y, x);
563     }
564   }
565   return DivRounded(total, count);
566 }
567 
568 // Given an input pix, and a box, the sides of the box are shrunk inwards until
569 // they bound any black pixels found within the original box.
570 // The function converts between tesseract coords and the pix coords assuming
571 // that this pix is full resolution equal in size to the original image.
572 // Returns an empty box if there are no black pixels in the source box.
BoundsWithinBox(Image pix,const TBOX & box)573 static TBOX BoundsWithinBox(Image pix, const TBOX &box) {
574   int im_height = pixGetHeight(pix);
575   Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height());
576   Box *output_box = nullptr;
577   pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
578   TBOX result_box;
579   if (output_box != nullptr) {
580     l_int32 x, y, width, height;
581     boxGetGeometry(output_box, &x, &y, &width, &height);
582     result_box.set_left(x);
583     result_box.set_right(x + width);
584     result_box.set_top(im_height - y);
585     result_box.set_bottom(result_box.top() - height);
586     boxDestroy(&output_box);
587   }
588   boxDestroy(&input_box);
589   return result_box;
590 }
591 
592 // Splits the given box in half at x_middle or y_middle according to split_on_x
593 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
594 // still includes the middle point, but does not touch any fg pixels in
595 // nontext_map. An empty box may be returned if there is no such box.
TruncateBoxToMissNonText(int x_middle,int y_middle,bool split_on_x,Image nontext_map,TBOX * bbox)596 static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Image nontext_map,
597                                      TBOX *bbox) {
598   TBOX box1(*bbox);
599   TBOX box2(*bbox);
600   TBOX im_box;
601   if (split_on_x) {
602     box1.set_right(x_middle);
603     im_box = BoundsWithinBox(nontext_map, box1);
604     if (!im_box.null_box()) {
605       box1.set_left(im_box.right());
606     }
607     box2.set_left(x_middle);
608     im_box = BoundsWithinBox(nontext_map, box2);
609     if (!im_box.null_box()) {
610       box2.set_right(im_box.left());
611     }
612   } else {
613     box1.set_bottom(y_middle);
614     im_box = BoundsWithinBox(nontext_map, box1);
615     if (!im_box.null_box()) {
616       box1.set_top(im_box.bottom());
617     }
618     box2.set_top(y_middle);
619     im_box = BoundsWithinBox(nontext_map, box2);
620     if (!im_box.null_box()) {
621       box2.set_bottom(im_box.top());
622     }
623   }
624   box1 += box2;
625   *bbox = box1;
626 }
627 
628 // Helper function to add 1 to a rectangle in source image coords to the
629 // internal projection pix_.
IncrementRectangle8Bit(const TBOX & box)630 void TextlineProjection::IncrementRectangle8Bit(const TBOX &box) {
631   int scaled_left = ImageXToProjectionX(box.left());
632   int scaled_top = ImageYToProjectionY(box.top());
633   int scaled_right = ImageXToProjectionX(box.right());
634   int scaled_bottom = ImageYToProjectionY(box.bottom());
635   int wpl = pixGetWpl(pix_);
636   uint32_t *data = pixGetData(pix_) + scaled_top * wpl;
637   for (int y = scaled_top; y <= scaled_bottom; ++y) {
638     for (int x = scaled_left; x <= scaled_right; ++x) {
639       int pixel = GET_DATA_BYTE(data, x);
640       if (pixel < 255) {
641         SET_DATA_BYTE(data, x, pixel + 1);
642       }
643     }
644     data += wpl;
645   }
646 }
647 
648 // Inserts a list of blobs into the projection.
649 // Rotation is a multiple of 90 degrees to get from blob coords to
650 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
651 // Blobs are spread horizontally or vertically according to their internal
652 // flags, but the spreading is truncated by set pixels in the nontext_map
653 // and also by the horizontal rule line limits on the blobs.
ProjectBlobs(BLOBNBOX_LIST * blobs,const FCOORD & rotation,const TBOX & nontext_map_box,Image nontext_map)654 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation,
655                                       const TBOX &nontext_map_box, Image nontext_map) {
656   BLOBNBOX_IT blob_it(blobs);
657   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
658     BLOBNBOX *blob = blob_it.data();
659     TBOX bbox = blob->bounding_box();
660     ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2);
661     bool spreading_horizontally = PadBlobBox(blob, &bbox);
662     // Rotate to match the nontext_map.
663     bbox.rotate(rotation);
664     middle.rotate(rotation);
665     if (rotation.x() == 0.0f) {
666       spreading_horizontally = !spreading_horizontally;
667     }
668     // Clip to the image before applying the increments.
669     bbox &= nontext_map_box; // This is in-place box intersection.
670     // Check for image pixels before spreading.
671     TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox);
672     if (bbox.area() > 0) {
673       IncrementRectangle8Bit(bbox);
674     }
675   }
676 }
677 
678 // Pads the bounding box of the given blob according to whether it is on
679 // a horizontal or vertical text line, taking into account tab-stops near
680 // the blob. Returns true if padding was in the horizontal direction.
PadBlobBox(BLOBNBOX * blob,TBOX * bbox)681 bool TextlineProjection::PadBlobBox(BLOBNBOX *blob, TBOX *bbox) {
682   // Determine which direction to spread.
683   // If text is well spaced out, it can be useful to pad perpendicular to
684   // the textline direction, so as to ensure diacritics get absorbed
685   // correctly, but if the text is tightly spaced, this will destroy the
686   // blank space between textlines in the projection map, and that would
687   // be very bad.
688   int pad_limit = scale_factor_ * kMinLineSpacingFactor;
689   int xpad = 0;
690   int ypad = 0;
691   bool padding_horizontally = false;
692   if (blob->UniquelyHorizontal()) {
693     xpad = bbox->height() * kOrientedPadFactor;
694     padding_horizontally = true;
695     // If the text appears to be very well spaced, pad the other direction by a
696     // single pixel in the projection profile space to help join diacritics to
697     // the textline.
698     if ((blob->neighbour(BND_ABOVE) == nullptr ||
699          bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
700         (blob->neighbour(BND_BELOW) == nullptr ||
701          bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
702       ypad = scale_factor_;
703     }
704   } else if (blob->UniquelyVertical()) {
705     ypad = bbox->width() * kOrientedPadFactor;
706     if ((blob->neighbour(BND_LEFT) == nullptr ||
707          bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
708         (blob->neighbour(BND_RIGHT) == nullptr ||
709          bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
710       xpad = scale_factor_;
711     }
712   } else {
713     if ((blob->neighbour(BND_ABOVE) != nullptr &&
714          blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
715         (blob->neighbour(BND_BELOW) != nullptr &&
716          blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
717       ypad = bbox->width() * kDefaultPadFactor;
718     }
719     if ((blob->neighbour(BND_RIGHT) != nullptr &&
720          blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
721         (blob->neighbour(BND_LEFT) != nullptr &&
722          blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
723       xpad = bbox->height() * kDefaultPadFactor;
724       padding_horizontally = true;
725     }
726   }
727   bbox->pad(xpad, ypad);
728   pad_limit = scale_factor_ * kMaxTabStopOverrun;
729   // Now shrink horizontally to avoid stepping more than pad_limit over a
730   // tab-stop.
731   if (bbox->left() < blob->left_rule() - pad_limit) {
732     bbox->set_left(blob->left_rule() - pad_limit);
733   }
734   if (bbox->right() > blob->right_rule() + pad_limit) {
735     bbox->set_right(blob->right_rule() + pad_limit);
736   }
737   return padding_horizontally;
738 }
739 
740 // Helper denormalizes the TPOINT with the denorm if not nullptr, then
741 // converts to pix_ coordinates.
TransformToPixCoords(const DENORM * denorm,TPOINT * pt) const742 void TextlineProjection::TransformToPixCoords(const DENORM *denorm, TPOINT *pt) const {
743   if (denorm != nullptr) {
744     // Denormalize the point.
745     denorm->DenormTransform(nullptr, *pt, pt);
746   }
747   pt->x = ImageXToProjectionX(pt->x);
748   pt->y = ImageYToProjectionY(pt->y);
749 }
750 
751 #if defined(_MSC_VER) && !defined(__clang__)
752 #  pragma optimize("g", off)
753 #endif // _MSC_VER
754 // Helper truncates the TPOINT to be within the pix_.
TruncateToImageBounds(TPOINT * pt) const755 void TextlineProjection::TruncateToImageBounds(TPOINT *pt) const {
756   pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
757   pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
758 }
759 #if defined(_MSC_VER) && !defined(__clang__)
760 #  pragma optimize("", on)
761 #endif // _MSC_VER
762 
763 // Transform tesseract image coordinates to coordinates used in the projection.
ImageXToProjectionX(int x) const764 int TextlineProjection::ImageXToProjectionX(int x) const {
765   x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
766   return x;
767 }
ImageYToProjectionY(int y) const768 int TextlineProjection::ImageYToProjectionY(int y) const {
769   y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
770   return y;
771 }
772 
773 } // namespace tesseract.
774