1 ///////////////////////////////////////////////////////////////////////
2 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Author: takenaka@google.com (Hiroshi Takenaka)
6 //
7 // Copyright 2011 Google Inc. All Rights Reserved.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 #include "cjkpitch.h"
21 #include "topitch.h"
22 #include "tovars.h"
23
24 #include <algorithm> // for std::sort
25 #include <cmath>
26 #include <vector> // for std::vector
27
28 namespace tesseract {
29
30 static BOOL_VAR(textord_space_size_is_variable, false,
31 "If true, word delimiter spaces are assumed to have "
32 "variable width, even though characters have fixed pitch.");
33
34 // Allow +/-10% error for character pitch / body size.
35 static const float kFPTolerance = 0.1f;
36
37 // Minimum ratio of "good" character pitch for a row to be considered
38 // to be fixed-pitch.
39 static const float kFixedPitchThreshold = 0.35f;
40
41 // rank statistics for a small collection of float values.
42 class SimpleStats {
43 public:
44 SimpleStats() = default;
45 ~SimpleStats() = default;
46
Clear()47 void Clear() {
48 values_.clear();
49 finalized_ = false;
50 }
51
Add(float value)52 void Add(float value) {
53 values_.push_back(value);
54 finalized_ = false;
55 }
56
Finish()57 void Finish() {
58 std::sort(values_.begin(), values_.end());
59 finalized_ = true;
60 }
61
ile(double frac)62 float ile(double frac) {
63 if (!finalized_) {
64 Finish();
65 }
66 if (values_.empty()) {
67 return 0.0f;
68 }
69 if (frac >= 1.0) {
70 return values_.back();
71 }
72 if (frac <= 0.0 || values_.size() == 1) {
73 return values_[0];
74 }
75 int index = static_cast<int>((values_.size() - 1) * frac);
76 float reminder = (values_.size() - 1) * frac - index;
77
78 return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder;
79 }
80
median()81 float median() {
82 return ile(0.5);
83 }
84
minimum()85 float minimum() {
86 if (!finalized_) {
87 Finish();
88 }
89 if (values_.empty()) {
90 return 0.0f;
91 }
92 return values_[0];
93 }
94
empty() const95 bool empty() const {
96 return values_.empty();
97 }
98
size() const99 int size() const {
100 return values_.size();
101 }
102
103 private:
104 bool finalized_ = false;
105 std::vector<float> values_;
106 };
107
108 // statistics for a small collection of float pairs (x, y).
109 // EstimateYFor(x, r) returns the estimated y at x, based on
110 // existing samples between x*(1-r) ~ x*(1+r).
111 class LocalCorrelation {
112 public:
113 struct float_pair {
114 float x, y;
115 int vote;
116 };
117
LocalCorrelation()118 LocalCorrelation() : finalized_(false) {}
119 ~LocalCorrelation() = default;
120
Finish()121 void Finish() {
122 std::sort(values_.begin(), values_.end(), float_pair_compare);
123 finalized_ = true;
124 }
125
Clear()126 void Clear() {
127 finalized_ = false;
128 }
129
Add(float x,float y,int v)130 void Add(float x, float y, int v) {
131 struct float_pair value;
132 value.x = x;
133 value.y = y;
134 value.vote = v;
135 values_.push_back(value);
136 finalized_ = false;
137 }
138
EstimateYFor(float x,float r)139 float EstimateYFor(float x, float r) {
140 ASSERT_HOST(finalized_);
141 unsigned start = 0, end = values_.size();
142 // Because the number of samples (used_) is assumed to be small,
143 // just use linear search to find values within the range.
144 while (start < values_.size() && values_[start].x < x * (1 - r)) {
145 start++;
146 }
147 while (end > 0 && values_[end - 1].x > x * (1 + r)) {
148 end--;
149 }
150
151 // Fall back to the global average if there are no data within r
152 // of x.
153 if (start >= end) {
154 start = 0;
155 end = values_.size();
156 }
157
158 // Compute weighted average of the values.
159 float rc = 0;
160 int vote = 0;
161 for (auto i = start; i < end; i++) {
162 rc += values_[i].vote * x * values_[i].y / values_[i].x;
163 vote += values_[i].vote;
164 }
165
166 return vote == 0 ? 0.0f : rc / vote;
167 }
168
169 private:
float_pair_compare(const float_pair f_a,const float_pair f_b)170 static bool float_pair_compare(const float_pair f_a, const float_pair f_b) {
171 return f_a.x < f_b.x;
172 }
173
174 bool finalized_;
175 std::vector<struct float_pair> values_;
176 };
177
178 // Class to represent a character on a fixed pitch row. A FPChar may
179 // consist of multiple blobs (BLOBNBOX's).
180 class FPChar {
181 public:
182 enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD };
183
FPChar()184 FPChar()
185 : box_()
186 , real_body_()
187 , from_(nullptr)
188 , to_(nullptr)
189 , num_blobs_(0)
190 , max_gap_(0)
191 , final_(false)
192 , alignment_(ALIGN_UNKNOWN)
193 , merge_to_prev_(false)
194 , delete_flag_(false) {}
195
196 // Initialize from blob.
Init(BLOBNBOX * blob)197 void Init(BLOBNBOX *blob) {
198 box_ = blob->bounding_box();
199 real_body_ = box_;
200 from_ = to_ = blob;
201 num_blobs_ = 1;
202 }
203
204 // Merge this character with "next". The "next" character should
205 // consist of succeeding blobs on the same row.
Merge(const FPChar & next)206 void Merge(const FPChar &next) {
207 int gap = real_body_.x_gap(next.real_body_);
208 if (gap > max_gap_) {
209 max_gap_ = gap;
210 }
211
212 box_ += next.box_;
213 real_body_ += next.real_body_;
214 to_ = next.to_;
215 num_blobs_ += next.num_blobs_;
216 }
217
218 // Accessors.
box() const219 const TBOX &box() const {
220 return box_;
221 }
set_box(const TBOX & box)222 void set_box(const TBOX &box) {
223 box_ = box;
224 }
real_body() const225 const TBOX &real_body() const {
226 return real_body_;
227 }
228
is_final() const229 bool is_final() const {
230 return final_;
231 }
set_final(bool flag)232 void set_final(bool flag) {
233 final_ = flag;
234 }
235
alignment() const236 const Alignment &alignment() const {
237 return alignment_;
238 }
set_alignment(Alignment alignment)239 void set_alignment(Alignment alignment) {
240 alignment_ = alignment;
241 }
242
merge_to_prev() const243 bool merge_to_prev() const {
244 return merge_to_prev_;
245 }
set_merge_to_prev(bool flag)246 void set_merge_to_prev(bool flag) {
247 merge_to_prev_ = flag;
248 }
249
delete_flag() const250 bool delete_flag() const {
251 return delete_flag_;
252 }
set_delete_flag(bool flag)253 void set_delete_flag(bool flag) {
254 delete_flag_ = flag;
255 }
256
max_gap() const257 int max_gap() const {
258 return max_gap_;
259 }
260
num_blobs() const261 int num_blobs() const {
262 return num_blobs_;
263 }
264
265 private:
266 TBOX box_; // Rectangle region considered to be occupied by this
267 // character. It could be bigger than the bounding box.
268 TBOX real_body_; // Real bounding box of this character.
269 BLOBNBOX *from_; // The first blob of this character.
270 BLOBNBOX *to_; // The last blob of this character.
271 int num_blobs_; // Number of blobs that belong to this character.
272 int max_gap_; // Maximum x gap between the blobs.
273
274 bool final_; // True if alignment/fragmentation decision for this
275 // character is finalized.
276
277 Alignment alignment_; // Alignment status.
278 bool merge_to_prev_; // True if this is a fragmented blob that
279 // needs to be merged to the previous
280 // character.
281
282 int delete_flag_; // True if this character is merged to another
283 // one and needs to be deleted.
284 };
285
286 // Class to represent a fixed pitch row, as a linear collection of
287 // FPChar's.
288 class FPRow {
289 public:
FPRow()290 FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {}
291
292 ~FPRow() = default;
293
294 // Initialize from TD_ROW.
295 void Init(TO_ROW *row);
296
297 // Estimate character pitch of this row, based on current alignment
298 // status of underlying FPChar's. The argument pass1 can be set to
299 // true if the function is called after Pass1Analyze(), to eliminate
300 // some redundant computation.
301 void EstimatePitch(bool pass1);
302
303 // Check each character if it has good character pitches between its
304 // predecessor and its successor and set its alignment status. If
305 // we already calculated the estimated pitch for this row, the value
306 // is used. If we didn't, a character is considered to be good, if
307 // the pitches between its predecessor and its successor are almost
308 // equal.
309 void Pass1Analyze();
310
311 // Find characters that fit nicely into one imaginary body next to a
312 // character which is already finalized. Then mark them as character
313 // fragments.
314 bool Pass2Analyze();
315
316 // Merge FPChars marked as character fragments into one.
317 void MergeFragments();
318
319 // Finalize characters that are already large enough and cannot be
320 // merged with others any more.
321 void FinalizeLargeChars();
322
323 // Output pitch estimation results to attributes of TD_ROW.
324 void OutputEstimations();
325
326 void DebugOutputResult(int row_index);
327
good_pitches()328 int good_pitches() {
329 return good_pitches_.size();
330 }
331
pitch()332 float pitch() {
333 return pitch_;
334 }
335
estimated_pitch()336 float estimated_pitch() {
337 return estimated_pitch_;
338 }
339
set_estimated_pitch(float v)340 void set_estimated_pitch(float v) {
341 estimated_pitch_ = v;
342 }
343
height()344 float height() {
345 return height_;
346 }
347
height_pitch_ratio()348 float height_pitch_ratio() {
349 if (good_pitches_.size() < 2) {
350 return -1.0;
351 }
352 return height_ / good_pitches_.median();
353 }
354
gap()355 float gap() {
356 return gap_;
357 }
358
num_chars()359 size_t num_chars() {
360 return characters_.size();
361 }
character(int i)362 FPChar *character(int i) {
363 return &characters_[i];
364 }
365
box(int i)366 const TBOX &box(int i) {
367 return characters_[i].box();
368 }
369
real_body(int i)370 const TBOX &real_body(int i) {
371 return characters_[i].real_body();
372 }
373
is_box_modified(int i)374 bool is_box_modified(int i) {
375 return !(characters_[i].box() == characters_[i].real_body());
376 }
377
center_x(int i)378 float center_x(int i) {
379 return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
380 }
381
is_final(int i)382 bool is_final(int i) {
383 return characters_[i].is_final();
384 }
385
finalize(int i)386 void finalize(int i) {
387 characters_[i].set_final(true);
388 }
389
is_good(int i)390 bool is_good(int i) {
391 return characters_[i].alignment() == FPChar::ALIGN_GOOD;
392 }
393
mark_good(int i)394 void mark_good(int i) {
395 characters_[i].set_alignment(FPChar::ALIGN_GOOD);
396 }
397
mark_bad(int i)398 void mark_bad(int i) {
399 characters_[i].set_alignment(FPChar::ALIGN_BAD);
400 }
401
clear_alignment(int i)402 void clear_alignment(int i) {
403 characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
404 }
405
406 private:
x_overlap_fraction(const TBOX & box1,const TBOX & box2)407 static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) {
408 if (std::min(box1.width(), box2.width()) == 0) {
409 return 0.0;
410 }
411 return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
412 }
413
mostly_overlap(const TBOX & box1,const TBOX & box2)414 static bool mostly_overlap(const TBOX &box1, const TBOX &box2) {
415 return x_overlap_fraction(box1, box2) > 0.9;
416 }
417
significant_overlap(const TBOX & box1,const TBOX & box2)418 static bool significant_overlap(const TBOX &box1, const TBOX &box2) {
419 if (std::min(box1.width(), box2.width()) == 0) {
420 return false;
421 }
422 int overlap = -box1.x_gap(box2);
423 return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
424 }
425
box_pitch(const TBOX & ref,const TBOX & box)426 static float box_pitch(const TBOX &ref, const TBOX &box) {
427 return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
428 }
429
430 // Check if two neighboring characters satisfy the fixed pitch model.
is_good_pitch(float pitch,const TBOX & box1,const TBOX & box2)431 static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) {
432 // Character box shouldn't exceed pitch.
433 if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
434 box2.width() >= pitch * (1.0 + kFPTolerance) ||
435 box1.height() >= pitch * (1.0 + kFPTolerance) ||
436 box2.height() >= pitch * (1.0 + kFPTolerance)) {
437 return false;
438 }
439
440 const float real_pitch = box_pitch(box1, box2);
441 if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
442 return true;
443 }
444
445 if (textord_space_size_is_variable) {
446 // Hangul characters usually have fixed pitch, but words are
447 // delimited by space which can be narrower than characters.
448 if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) {
449 return true;
450 }
451 }
452 return false;
453 }
454
is_interesting_blob(const BLOBNBOX * blob)455 static bool is_interesting_blob(const BLOBNBOX *blob) {
456 return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
457 }
458
459 // Cleanup chars that are already merged to others.
DeleteChars()460 void DeleteChars() {
461 unsigned index = 0;
462 for (unsigned i = 0; i < characters_.size(); ++i) {
463 if (!characters_[i].delete_flag()) {
464 if (index != i) {
465 characters_[index] = characters_[i];
466 }
467 index++;
468 }
469 }
470 characters_.resize(index);
471 }
472
473 float pitch_ = 0.0f; // Character pitch.
474 float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
475 // to be good enough.
476 float height_ = 0.0f; // Character height.
477 float gap_ = 0.0f; // Minimum gap between characters.
478
479 // Pitches between any two successive characters.
480 SimpleStats all_pitches_;
481 // Gaps between any two successive characters.
482 SimpleStats all_gaps_;
483 // Pitches between any two successive characters that are consistent
484 // with the fixed pitch model.
485 SimpleStats good_pitches_;
486 // Gaps between any two successive characters that are consistent
487 // with the fixed pitch model.
488 SimpleStats good_gaps_;
489
490 SimpleStats heights_;
491
492 std::vector<FPChar> characters_;
493 TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
494 };
495
Init(TO_ROW * row)496 void FPRow::Init(TO_ROW *row) {
497 ASSERT_HOST(row != nullptr);
498 ASSERT_HOST(row->xheight > 0);
499 real_row_ = row;
500 real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
501
502 BLOBNBOX_IT blob_it = row->blob_list();
503 // Initialize characters_ and compute the initial estimation of
504 // character height.
505 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
506 if (is_interesting_blob(blob_it.data())) {
507 FPChar fp_char;
508 fp_char.Init(blob_it.data());
509 // Merge unconditionally if two blobs overlap.
510 if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) {
511 characters_.back().Merge(fp_char);
512 } else {
513 characters_.push_back(fp_char);
514 }
515 TBOX bound = blob_it.data()->bounding_box();
516 if (bound.height() * 3.0 > bound.width()) {
517 heights_.Add(bound.height());
518 }
519 }
520 }
521 heights_.Finish();
522 height_ = heights_.ile(0.875);
523 }
524
OutputEstimations()525 void FPRow::OutputEstimations() {
526 if (good_pitches_.empty()) {
527 pitch_ = 0.0f;
528 real_row_->pitch_decision = PITCH_CORR_PROP;
529 return;
530 }
531
532 pitch_ = good_pitches_.median();
533 real_row_->fixed_pitch = pitch_;
534 // good_gaps_.ile(0.125) can be large if most characters on the row
535 // are skinny. Use pitch_ - height_ instead if it's smaller, but
536 // positive.
537 real_row_->kern_size = real_row_->pr_nonsp =
538 std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
539 real_row_->body_size = pitch_ - real_row_->kern_size;
540
541 if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
542 // If more than half of the characters of a line don't fit to the
543 // fixed pitch model, consider the line to be proportional. 50%
544 // seems to be a good threshold in practice as well.
545 // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
546 // real_row_ as a partial estimation result and try to use them in the
547 // normalization process.
548 real_row_->pitch_decision = PITCH_CORR_PROP;
549 return;
550 } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
551 real_row_->pitch_decision = PITCH_DEF_FIXED;
552 } else {
553 real_row_->pitch_decision = PITCH_CORR_FIXED;
554 }
555
556 real_row_->space_size = real_row_->pr_space = pitch_;
557 // Set min_space to 50% of character pitch so that we can break CJK
558 // text at a half-width space after punctuation.
559 real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
560
561 // Don't consider a quarter space as a real space, because it's used
562 // for line justification in traditional Japanese books.
563 real_row_->max_nonspace =
564 std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875)));
565
566 int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
567 static_cast<int>(real_row_->xheight));
568
569 // Make max_nonspace larger than any intra-character gap so that
570 // make_prop_words() won't break a row at the middle of a character.
571 for (size_t i = 0; i < num_chars(); ++i) {
572 if (characters_[i].max_gap() > real_row_->max_nonspace) {
573 real_row_->max_nonspace = characters_[i].max_gap();
574 }
575 }
576 real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
577 static_cast<int>(real_row_->xheight));
578 real_row_->used_dm_model = false;
579
580 // Setup char_cells.
581 ICOORDELT_IT cell_it = &real_row_->char_cells;
582 auto *cell = new ICOORDELT(real_body(0).left(), 0);
583 cell_it.add_after_then_move(cell);
584
585 int right = real_body(0).right();
586 for (size_t i = 1; i < num_chars(); ++i) {
587 // Put a word break if gap between two characters is bigger than
588 // space_threshold. Don't break if none of two characters
589 // couldn't be "finalized", because maybe they need to be merged
590 // to one character.
591 if ((is_final(i - 1) || is_final(i)) &&
592 real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
593 cell = new ICOORDELT(right + 1, 0);
594 cell_it.add_after_then_move(cell);
595 while (right + pitch_ < box(i).left()) {
596 right += pitch_;
597 cell = new ICOORDELT(right + 1, 0);
598 cell_it.add_after_then_move(cell);
599 }
600 right = box(i).left();
601 }
602 cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
603 cell_it.add_after_then_move(cell);
604 right = real_body(i).right();
605 }
606
607 cell = new ICOORDELT(right + 1, 0);
608 cell_it.add_after_then_move(cell);
609
610 // TODO(takenaka): add code to store alignment/fragmentation
611 // information to blobs so that it can be reused later, e.g. in
612 // recognition phase.
613 }
614
EstimatePitch(bool pass1)615 void FPRow::EstimatePitch(bool pass1) {
616 good_pitches_.Clear();
617 all_pitches_.Clear();
618 good_gaps_.Clear();
619 all_gaps_.Clear();
620 heights_.Clear();
621 if (num_chars() == 0) {
622 return;
623 }
624
625 int32_t cx0, cx1;
626 bool prev_was_good = is_good(0);
627 cx0 = center_x(0);
628
629 heights_.Add(box(0).height());
630 for (size_t i = 1; i < num_chars(); i++) {
631 cx1 = center_x(i);
632 int32_t pitch = cx1 - cx0;
633 int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
634
635 heights_.Add(box(i).height());
636 // Ignore if the pitch is too close. But don't ignore wide pitch
637 // may be the result of large tracking.
638 if (pitch > height_ * 0.5) {
639 all_pitches_.Add(pitch);
640 all_gaps_.Add(gap);
641 if (is_good(i)) {
642 // In pass1 (after Pass1Analyze()), all characters marked as
643 // "good" have a good consistent pitch with their previous
644 // characters. However, it's not true in pass2 and a good
645 // character may have a good pitch only between its successor.
646 // So we collect only pitch values between two good
647 // characters. and within tolerance in pass2.
648 if (pass1 ||
649 (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
650 good_pitches_.Add(pitch);
651 if (!is_box_modified(i - 1) && !is_box_modified(i)) {
652 good_gaps_.Add(gap);
653 }
654 }
655 prev_was_good = true;
656 } else {
657 prev_was_good = false;
658 }
659 }
660 cx0 = cx1;
661 }
662
663 good_pitches_.Finish();
664 all_pitches_.Finish();
665 good_gaps_.Finish();
666 all_gaps_.Finish();
667 heights_.Finish();
668
669 height_ = heights_.ile(0.875);
670 if (all_pitches_.empty()) {
671 pitch_ = 0.0f;
672 gap_ = 0.0f;
673 } else if (good_pitches_.size() < 2) {
674 // We don't have enough data to estimate the pitch of this row yet.
675 // Use median of all pitches as the initial guess.
676 pitch_ = all_pitches_.median();
677 ASSERT_HOST(pitch_ > 0.0f);
678 gap_ = all_gaps_.ile(0.125);
679 } else {
680 pitch_ = good_pitches_.median();
681 ASSERT_HOST(pitch_ > 0.0f);
682 gap_ = good_gaps_.ile(0.125);
683 }
684 }
685
DebugOutputResult(int row_index)686 void FPRow::DebugOutputResult(int row_index) {
687 if (num_chars() > 0) {
688 tprintf(
689 "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
690 "space_size=%f, space_threshold=%d, xheight=%f\n",
691 row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch,
692 real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold,
693 real_row_->xheight);
694
695 for (unsigned i = 0; i < num_chars(); i++) {
696 tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i),
697 character(i)->num_blobs());
698 box(i).print();
699 }
700 }
701 }
702
Pass1Analyze()703 void FPRow::Pass1Analyze() {
704 if (num_chars() < 2) {
705 return;
706 }
707
708 if (estimated_pitch_ > 0.0f) {
709 for (size_t i = 2; i < num_chars(); i++) {
710 if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) &&
711 is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
712 mark_good(i - 1);
713 }
714 }
715 } else {
716 for (size_t i = 2; i < num_chars(); i++) {
717 if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) {
718 mark_good(i - 1);
719 }
720 }
721 }
722 character(0)->set_alignment(character(1)->alignment());
723 character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment());
724 }
725
Pass2Analyze()726 bool FPRow::Pass2Analyze() {
727 bool changed = false;
728 if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
729 return false;
730 }
731 for (size_t i = 0; i < num_chars(); i++) {
732 if (is_final(i)) {
733 continue;
734 }
735
736 FPChar::Alignment alignment = character(i)->alignment();
737 bool intersecting = false;
738 bool not_intersecting = false;
739
740 if (i < num_chars() - 1 && is_final(i + 1)) {
741 // Next character is already finalized. Estimate the imaginary
742 // body including this character based on the character. Skip
743 // whitespace if necessary.
744 bool skipped_whitespaces = false;
745 float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
746 while (c1 > box(i).right()) {
747 skipped_whitespaces = true;
748 c1 -= estimated_pitch_;
749 }
750 TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
751
752 // Collect all characters that mostly fit in the region.
753 // Also, their union height shouldn't be too big.
754 int j = i;
755 TBOX merged;
756 while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
757 merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
758 merged += box(j);
759 j--;
760 }
761
762 if (j >= 0 && significant_overlap(ibody, box(j))) {
763 // character(j) lies on the character boundary and doesn't fit
764 // well into the imaginary body.
765 if (!is_final(j)) {
766 intersecting = true;
767 }
768 } else {
769 not_intersecting = true;
770 if (i - j > 0) {
771 // Merge character(j+1) ... character(i) because they fit
772 // into the body nicely.
773 if (i - j == 1) {
774 // Only one char in the imaginary body.
775 if (!skipped_whitespaces) {
776 mark_good(i);
777 }
778 // set ibody as bounding box of this character to get
779 // better pitch analysis result for halfwidth glyphs
780 // followed by a halfwidth space.
781 if (box(i).width() <= estimated_pitch_ * 0.5) {
782 ibody += box(i);
783 character(i)->set_box(ibody);
784 }
785 character(i)->set_merge_to_prev(false);
786 finalize(i);
787 } else {
788 for (int k = i; k > j + 1; k--) {
789 character(k)->set_merge_to_prev(true);
790 }
791 }
792 }
793 }
794 }
795 if (i > 0 && is_final(i - 1)) {
796 // Now we repeat everything from the opposite side. Previous
797 // character is already finalized. Estimate the imaginary body
798 // including this character based on the character.
799 bool skipped_whitespaces = false;
800 float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
801 while (c1 < box(i).left()) {
802 skipped_whitespaces = true;
803 c1 += estimated_pitch_;
804 }
805 TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
806
807 size_t j = i;
808 TBOX merged;
809 while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
810 merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
811 merged += box(j);
812 j++;
813 }
814
815 if (j < num_chars() && significant_overlap(ibody, box(j))) {
816 if (!is_final(j)) {
817 intersecting = true;
818 }
819 } else {
820 not_intersecting = true;
821 if (j - i > 0) {
822 if (j - i == 1) {
823 if (!skipped_whitespaces) {
824 mark_good(i);
825 }
826 if (box(i).width() <= estimated_pitch_ * 0.5) {
827 ibody += box(i);
828 character(i)->set_box(ibody);
829 }
830 character(i)->set_merge_to_prev(false);
831 finalize(i);
832 } else {
833 for (size_t k = i + 1; k < j; k++) {
834 character(k)->set_merge_to_prev(true);
835 }
836 }
837 }
838 }
839 }
840
841 // This character doesn't fit well into the estimated imaginary
842 // bodies. Mark it as bad.
843 if (intersecting && !not_intersecting) {
844 mark_bad(i);
845 }
846 if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) {
847 changed = true;
848 }
849 }
850
851 return changed;
852 }
853
MergeFragments()854 void FPRow::MergeFragments() {
855 int last_char = 0;
856
857 for (size_t j = 0; j < num_chars(); ++j) {
858 if (character(j)->merge_to_prev()) {
859 character(last_char)->Merge(*character(j));
860 character(j)->set_delete_flag(true);
861 clear_alignment(last_char);
862 character(j - 1)->set_merge_to_prev(false);
863 } else {
864 last_char = j;
865 }
866 }
867 DeleteChars();
868 }
869
FinalizeLargeChars()870 void FPRow::FinalizeLargeChars() {
871 float row_pitch = estimated_pitch();
872 for (size_t i = 0; i < num_chars(); i++) {
873 if (is_final(i)) {
874 continue;
875 }
876
877 // Finalize if both neighbors are finalized. We have no other choice.
878 if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
879 finalize(i);
880 continue;
881 }
882
883 float cx = center_x(i);
884 TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
885 if (i > 0) {
886 // The preceding character significantly intersects with the
887 // imaginary body of this character. Let Pass2Analyze() handle
888 // this case.
889 if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) {
890 continue;
891 }
892 if (!is_final(i - 1)) {
893 TBOX merged = box(i);
894 merged += box(i - 1);
895 if (merged.width() < row_pitch) {
896 continue;
897 }
898 // This character cannot be finalized yet because it can be
899 // merged with the previous one. Again, let Pass2Analyze()
900 // handle this case.
901 }
902 }
903 if (i < num_chars() - 1) {
904 if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) {
905 continue;
906 }
907 if (!is_final(i + 1)) {
908 TBOX merged = box(i);
909 merged += box(i + 1);
910 if (merged.width() < row_pitch) {
911 continue;
912 }
913 }
914 }
915 finalize(i);
916 }
917
918 // Update alignment decision. We only consider finalized characters
919 // in pass2. E.g. if a finalized character C has another finalized
920 // character L on its left and a not-finalized character R on its
921 // right, we mark C as good if the pitch between C and L is good,
922 // regardless of the pitch between C and R.
923 for (size_t i = 0; i < num_chars(); i++) {
924 if (!is_final(i)) {
925 continue;
926 }
927 bool good_pitch = false;
928 bool bad_pitch = false;
929 if (i > 0 && is_final(i - 1)) {
930 if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
931 good_pitch = true;
932 } else {
933 bad_pitch = true;
934 }
935 }
936 if (i < num_chars() - 1 && is_final(i + 1)) {
937 if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
938 good_pitch = true;
939 } else {
940 bad_pitch = true;
941 }
942 }
943 if (good_pitch && !bad_pitch) {
944 mark_good(i);
945 } else if (!good_pitch && bad_pitch) {
946 mark_bad(i);
947 }
948 }
949 }
950
951 class FPAnalyzer {
952 public:
953 FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
954 ~FPAnalyzer() = default;
955
Pass1Analyze()956 void Pass1Analyze() {
957 for (auto &row : rows_) {
958 row.Pass1Analyze();
959 }
960 }
961
962 // Estimate character pitch for each row. The argument pass1 can be
963 // set to true if the function is called after Pass1Analyze(), to
964 // eliminate some redundant computation.
965 void EstimatePitch(bool pass1);
966
maybe_fixed_pitch()967 bool maybe_fixed_pitch() {
968 if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969 return false;
970 }
971 return true;
972 }
973
MergeFragments()974 void MergeFragments() {
975 for (auto &row : rows_) {
976 row.MergeFragments();
977 }
978 }
979
FinalizeLargeChars()980 void FinalizeLargeChars() {
981 for (auto &row : rows_) {
982 row.FinalizeLargeChars();
983 }
984 }
985
Pass2Analyze()986 bool Pass2Analyze() {
987 bool changed = false;
988 for (auto &row : rows_) {
989 if (row.Pass2Analyze()) {
990 changed = true;
991 }
992 }
993 return changed;
994 }
995
OutputEstimations()996 void OutputEstimations() {
997 for (auto &row : rows_) {
998 row.OutputEstimations();
999 }
1000 // Don't we need page-level estimation of gaps/spaces?
1001 }
1002
DebugOutputResult()1003 void DebugOutputResult() {
1004 tprintf("FPAnalyzer: final result\n");
1005 for (size_t i = 0; i < rows_.size(); i++) {
1006 rows_[i].DebugOutputResult(i);
1007 }
1008 }
1009
num_rows()1010 size_t num_rows() {
1011 return rows_.size();
1012 }
1013
1014 // Returns the upper limit for pass2 loop iteration.
max_iteration()1015 unsigned max_iteration() {
1016 // We're fixing at least one character per iteration. So basically
1017 // we shouldn't require more than max_chars_per_row_ iterations.
1018 return max_chars_per_row_ + 100;
1019 }
1020
1021 private:
1022 ICOORD page_tr_;
1023 std::vector<FPRow> rows_;
1024 unsigned num_tall_rows_;
1025 unsigned num_bad_rows_;
1026 // TODO: num_empty_rows_ is incremented, but never used otherwise.
1027 unsigned num_empty_rows_;
1028 unsigned max_chars_per_row_;
1029 };
1030
FPAnalyzer(ICOORD page_tr,TO_BLOCK_LIST * port_blocks)1031 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
1032 : page_tr_(page_tr)
1033 , num_tall_rows_(0)
1034 , num_bad_rows_(0)
1035 , num_empty_rows_(0)
1036 , max_chars_per_row_(0) {
1037 TO_BLOCK_IT block_it(port_blocks);
1038
1039 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040 TO_BLOCK *block = block_it.data();
1041 if (!block->get_rows()->empty()) {
1042 ASSERT_HOST(block->xheight > 0);
1043 find_repeated_chars(block, false);
1044 }
1045 }
1046
1047 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048 TO_ROW_IT row_it = block_it.data()->get_rows();
1049 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050 FPRow row;
1051 row.Init(row_it.data());
1052 rows_.push_back(row);
1053 size_t num_chars = rows_.back().num_chars();
1054 if (num_chars <= 1) {
1055 num_empty_rows_++;
1056 }
1057 if (num_chars > max_chars_per_row_) {
1058 max_chars_per_row_ = num_chars;
1059 }
1060 }
1061 }
1062 }
1063
EstimatePitch(bool pass1)1064 void FPAnalyzer::EstimatePitch(bool pass1) {
1065 LocalCorrelation pitch_height_stats;
1066
1067 num_tall_rows_ = 0;
1068 num_bad_rows_ = 0;
1069 pitch_height_stats.Clear();
1070 for (auto &row : rows_) {
1071 row.EstimatePitch(pass1);
1072 if (row.good_pitches()) {
1073 pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074 if (row.height_pitch_ratio() > 1.1) {
1075 num_tall_rows_++;
1076 }
1077 } else {
1078 num_bad_rows_++;
1079 }
1080 }
1081
1082 pitch_height_stats.Finish();
1083 for (auto &row : rows_) {
1084 if (row.good_pitches() >= 5) {
1085 // We have enough evidences. Just use the pitch estimation
1086 // from this row.
1087 row.set_estimated_pitch(row.pitch());
1088 } else if (row.num_chars() > 1) {
1089 float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090 // CJK characters are more likely to be fragmented than poorly
1091 // chopped. So trust the page-level estimation of character
1092 // pitch only if it's larger than row-level estimation or
1093 // row-level estimation is too large (2x bigger than row height).
1094 if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095 row.set_estimated_pitch(estimated_pitch);
1096 } else {
1097 row.set_estimated_pitch(row.pitch());
1098 }
1099 }
1100 }
1101 }
1102
compute_fixed_pitch_cjk(ICOORD page_tr,TO_BLOCK_LIST * port_blocks)1103 void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
1104 FPAnalyzer analyzer(page_tr, port_blocks);
1105 if (analyzer.num_rows() == 0) {
1106 return;
1107 }
1108
1109 analyzer.Pass1Analyze();
1110 analyzer.EstimatePitch(true);
1111
1112 // Perform pass1 analysis again with the initial estimation of row
1113 // pitches, for better estimation.
1114 analyzer.Pass1Analyze();
1115 analyzer.EstimatePitch(true);
1116
1117 // Early exit if the page doesn't seem to contain fixed pitch rows.
1118 if (!analyzer.maybe_fixed_pitch()) {
1119 if (textord_debug_pitch_test) {
1120 tprintf("Page doesn't seem to contain fixed pitch rows\n");
1121 }
1122 return;
1123 }
1124
1125 unsigned iteration = 0;
1126 do {
1127 analyzer.MergeFragments();
1128 analyzer.FinalizeLargeChars();
1129 analyzer.EstimatePitch(false);
1130 iteration++;
1131 } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1132
1133 if (textord_debug_pitch_test) {
1134 tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
1135 analyzer.max_iteration());
1136 }
1137
1138 analyzer.OutputEstimations();
1139 if (textord_debug_pitch_test) {
1140 analyzer.DebugOutputResult();
1141 }
1142 }
1143
1144 } // namespace tesseract
1145