1 ///////////////////////////////////////////////////////////////////////
2 // File:        cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 //              for CJK text.
5 // Author:      takenaka@google.com (Hiroshi Takenaka)
6 //
7 // Copyright 2011 Google Inc. All Rights Reserved.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #include "cjkpitch.h"
21 #include "topitch.h"
22 #include "tovars.h"
23 
24 #include <algorithm> // for std::sort
25 #include <cmath>
26 #include <vector>    // for std::vector
27 
28 namespace tesseract {
29 
30 static BOOL_VAR(textord_space_size_is_variable, false,
31                 "If true, word delimiter spaces are assumed to have "
32                 "variable width, even though characters have fixed pitch.");
33 
34 // Allow +/-10% error for character pitch / body size.
35 static const float kFPTolerance = 0.1f;
36 
37 // Minimum ratio of "good" character pitch for a row to be considered
38 // to be fixed-pitch.
39 static const float kFixedPitchThreshold = 0.35f;
40 
41 // rank statistics for a small collection of float values.
42 class SimpleStats {
43 public:
44   SimpleStats() = default;
45   ~SimpleStats() = default;
46 
Clear()47   void Clear() {
48     values_.clear();
49     finalized_ = false;
50   }
51 
Add(float value)52   void Add(float value) {
53     values_.push_back(value);
54     finalized_ = false;
55   }
56 
Finish()57   void Finish() {
58     std::sort(values_.begin(), values_.end());
59     finalized_ = true;
60   }
61 
ile(double frac)62   float ile(double frac) {
63     if (!finalized_) {
64       Finish();
65     }
66     if (values_.empty()) {
67       return 0.0f;
68     }
69     if (frac >= 1.0) {
70       return values_.back();
71     }
72     if (frac <= 0.0 || values_.size() == 1) {
73       return values_[0];
74     }
75     int index = static_cast<int>((values_.size() - 1) * frac);
76     float reminder = (values_.size() - 1) * frac - index;
77 
78     return values_[index] * (1.0f - reminder) + values_[index + 1] * reminder;
79   }
80 
median()81   float median() {
82     return ile(0.5);
83   }
84 
minimum()85   float minimum() {
86     if (!finalized_) {
87       Finish();
88     }
89     if (values_.empty()) {
90       return 0.0f;
91     }
92     return values_[0];
93   }
94 
empty() const95   bool empty() const {
96     return values_.empty();
97   }
98 
size() const99   int size() const {
100     return values_.size();
101   }
102 
103 private:
104   bool finalized_ = false;
105   std::vector<float> values_;
106 };
107 
108 // statistics for a small collection of float pairs (x, y).
109 // EstimateYFor(x, r) returns the estimated y at x, based on
110 // existing samples between x*(1-r) ~ x*(1+r).
111 class LocalCorrelation {
112 public:
113   struct float_pair {
114     float x, y;
115     int vote;
116   };
117 
LocalCorrelation()118   LocalCorrelation() : finalized_(false) {}
119   ~LocalCorrelation() = default;
120 
Finish()121   void Finish() {
122     std::sort(values_.begin(), values_.end(), float_pair_compare);
123     finalized_ = true;
124   }
125 
Clear()126   void Clear() {
127     finalized_ = false;
128   }
129 
Add(float x,float y,int v)130   void Add(float x, float y, int v) {
131     struct float_pair value;
132     value.x = x;
133     value.y = y;
134     value.vote = v;
135     values_.push_back(value);
136     finalized_ = false;
137   }
138 
EstimateYFor(float x,float r)139   float EstimateYFor(float x, float r) {
140     ASSERT_HOST(finalized_);
141     unsigned start = 0, end = values_.size();
142     // Because the number of samples (used_) is assumed to be small,
143     // just use linear search to find values within the range.
144     while (start < values_.size() && values_[start].x < x * (1 - r)) {
145       start++;
146     }
147     while (end > 0 && values_[end - 1].x > x * (1 + r)) {
148       end--;
149     }
150 
151     // Fall back to the global average if there are no data within r
152     // of x.
153     if (start >= end) {
154       start = 0;
155       end = values_.size();
156     }
157 
158     // Compute weighted average of the values.
159     float rc = 0;
160     int vote = 0;
161     for (auto i = start; i < end; i++) {
162       rc += values_[i].vote * x * values_[i].y / values_[i].x;
163       vote += values_[i].vote;
164     }
165 
166     return vote == 0 ? 0.0f : rc / vote;
167   }
168 
169 private:
float_pair_compare(const float_pair f_a,const float_pair f_b)170   static bool float_pair_compare(const float_pair f_a, const float_pair f_b) {
171     return f_a.x < f_b.x;
172   }
173 
174   bool finalized_;
175   std::vector<struct float_pair> values_;
176 };
177 
178 // Class to represent a character on a fixed pitch row.  A FPChar may
179 // consist of multiple blobs (BLOBNBOX's).
180 class FPChar {
181 public:
182   enum Alignment { ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD };
183 
FPChar()184   FPChar()
185       : box_()
186       , real_body_()
187       , from_(nullptr)
188       , to_(nullptr)
189       , num_blobs_(0)
190       , max_gap_(0)
191       , final_(false)
192       , alignment_(ALIGN_UNKNOWN)
193       , merge_to_prev_(false)
194       , delete_flag_(false) {}
195 
196   // Initialize from blob.
Init(BLOBNBOX * blob)197   void Init(BLOBNBOX *blob) {
198     box_ = blob->bounding_box();
199     real_body_ = box_;
200     from_ = to_ = blob;
201     num_blobs_ = 1;
202   }
203 
204   // Merge this character with "next". The "next" character should
205   // consist of succeeding blobs on the same row.
Merge(const FPChar & next)206   void Merge(const FPChar &next) {
207     int gap = real_body_.x_gap(next.real_body_);
208     if (gap > max_gap_) {
209       max_gap_ = gap;
210     }
211 
212     box_ += next.box_;
213     real_body_ += next.real_body_;
214     to_ = next.to_;
215     num_blobs_ += next.num_blobs_;
216   }
217 
218   // Accessors.
box() const219   const TBOX &box() const {
220     return box_;
221   }
set_box(const TBOX & box)222   void set_box(const TBOX &box) {
223     box_ = box;
224   }
real_body() const225   const TBOX &real_body() const {
226     return real_body_;
227   }
228 
is_final() const229   bool is_final() const {
230     return final_;
231   }
set_final(bool flag)232   void set_final(bool flag) {
233     final_ = flag;
234   }
235 
alignment() const236   const Alignment &alignment() const {
237     return alignment_;
238   }
set_alignment(Alignment alignment)239   void set_alignment(Alignment alignment) {
240     alignment_ = alignment;
241   }
242 
merge_to_prev() const243   bool merge_to_prev() const {
244     return merge_to_prev_;
245   }
set_merge_to_prev(bool flag)246   void set_merge_to_prev(bool flag) {
247     merge_to_prev_ = flag;
248   }
249 
delete_flag() const250   bool delete_flag() const {
251     return delete_flag_;
252   }
set_delete_flag(bool flag)253   void set_delete_flag(bool flag) {
254     delete_flag_ = flag;
255   }
256 
max_gap() const257   int max_gap() const {
258     return max_gap_;
259   }
260 
num_blobs() const261   int num_blobs() const {
262     return num_blobs_;
263   }
264 
265 private:
266   TBOX box_; // Rectangle region considered to be occupied by this
267   // character.  It could be bigger than the bounding box.
268   TBOX real_body_; // Real bounding box of this character.
269   BLOBNBOX *from_; // The first blob of this character.
270   BLOBNBOX *to_;   // The last blob of this character.
271   int num_blobs_;  // Number of blobs that belong to this character.
272   int max_gap_;    // Maximum x gap between the blobs.
273 
274   bool final_; // True if alignment/fragmentation decision for this
275   // character is finalized.
276 
277   Alignment alignment_; // Alignment status.
278   bool merge_to_prev_;  // True if this is a fragmented blob that
279   // needs to be merged to the previous
280   // character.
281 
282   int delete_flag_; // True if this character is merged to another
283                     // one and needs to be deleted.
284 };
285 
286 // Class to represent a fixed pitch row, as a linear collection of
287 // FPChar's.
288 class FPRow {
289 public:
FPRow()290   FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), heights_(), characters_() {}
291 
292   ~FPRow() = default;
293 
294   // Initialize from TD_ROW.
295   void Init(TO_ROW *row);
296 
297   // Estimate character pitch of this row, based on current alignment
298   // status of underlying FPChar's.  The argument pass1 can be set to
299   // true if the function is called after Pass1Analyze(), to eliminate
300   // some redundant computation.
301   void EstimatePitch(bool pass1);
302 
303   // Check each character if it has good character pitches between its
304   // predecessor and its successor and set its alignment status.  If
305   // we already calculated the estimated pitch for this row, the value
306   // is used.  If we didn't, a character is considered to be good, if
307   // the pitches between its predecessor and its successor are almost
308   // equal.
309   void Pass1Analyze();
310 
311   // Find characters that fit nicely into one imaginary body next to a
312   // character which is already finalized. Then mark them as character
313   // fragments.
314   bool Pass2Analyze();
315 
316   // Merge FPChars marked as character fragments into one.
317   void MergeFragments();
318 
319   // Finalize characters that are already large enough and cannot be
320   // merged with others any more.
321   void FinalizeLargeChars();
322 
323   // Output pitch estimation results to attributes of TD_ROW.
324   void OutputEstimations();
325 
326   void DebugOutputResult(int row_index);
327 
good_pitches()328   int good_pitches() {
329     return good_pitches_.size();
330   }
331 
pitch()332   float pitch() {
333     return pitch_;
334   }
335 
estimated_pitch()336   float estimated_pitch() {
337     return estimated_pitch_;
338   }
339 
set_estimated_pitch(float v)340   void set_estimated_pitch(float v) {
341     estimated_pitch_ = v;
342   }
343 
height()344   float height() {
345     return height_;
346   }
347 
height_pitch_ratio()348   float height_pitch_ratio() {
349     if (good_pitches_.size() < 2) {
350       return -1.0;
351     }
352     return height_ / good_pitches_.median();
353   }
354 
gap()355   float gap() {
356     return gap_;
357   }
358 
num_chars()359   size_t num_chars() {
360     return characters_.size();
361   }
character(int i)362   FPChar *character(int i) {
363     return &characters_[i];
364   }
365 
box(int i)366   const TBOX &box(int i) {
367     return characters_[i].box();
368   }
369 
real_body(int i)370   const TBOX &real_body(int i) {
371     return characters_[i].real_body();
372   }
373 
is_box_modified(int i)374   bool is_box_modified(int i) {
375     return !(characters_[i].box() == characters_[i].real_body());
376   }
377 
center_x(int i)378   float center_x(int i) {
379     return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
380   }
381 
is_final(int i)382   bool is_final(int i) {
383     return characters_[i].is_final();
384   }
385 
finalize(int i)386   void finalize(int i) {
387     characters_[i].set_final(true);
388   }
389 
is_good(int i)390   bool is_good(int i) {
391     return characters_[i].alignment() == FPChar::ALIGN_GOOD;
392   }
393 
mark_good(int i)394   void mark_good(int i) {
395     characters_[i].set_alignment(FPChar::ALIGN_GOOD);
396   }
397 
mark_bad(int i)398   void mark_bad(int i) {
399     characters_[i].set_alignment(FPChar::ALIGN_BAD);
400   }
401 
clear_alignment(int i)402   void clear_alignment(int i) {
403     characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
404   }
405 
406 private:
x_overlap_fraction(const TBOX & box1,const TBOX & box2)407   static float x_overlap_fraction(const TBOX &box1, const TBOX &box2) {
408     if (std::min(box1.width(), box2.width()) == 0) {
409       return 0.0;
410     }
411     return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
412   }
413 
mostly_overlap(const TBOX & box1,const TBOX & box2)414   static bool mostly_overlap(const TBOX &box1, const TBOX &box2) {
415     return x_overlap_fraction(box1, box2) > 0.9;
416   }
417 
significant_overlap(const TBOX & box1,const TBOX & box2)418   static bool significant_overlap(const TBOX &box1, const TBOX &box2) {
419     if (std::min(box1.width(), box2.width()) == 0) {
420       return false;
421     }
422     int overlap = -box1.x_gap(box2);
423     return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
424   }
425 
box_pitch(const TBOX & ref,const TBOX & box)426   static float box_pitch(const TBOX &ref, const TBOX &box) {
427     return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
428   }
429 
430   // Check if two neighboring characters satisfy the fixed pitch model.
is_good_pitch(float pitch,const TBOX & box1,const TBOX & box2)431   static bool is_good_pitch(float pitch, const TBOX &box1, const TBOX &box2) {
432     // Character box shouldn't exceed pitch.
433     if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
434         box2.width() >= pitch * (1.0 + kFPTolerance) ||
435         box1.height() >= pitch * (1.0 + kFPTolerance) ||
436         box2.height() >= pitch * (1.0 + kFPTolerance)) {
437       return false;
438     }
439 
440     const float real_pitch = box_pitch(box1, box2);
441     if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
442       return true;
443     }
444 
445     if (textord_space_size_is_variable) {
446       // Hangul characters usually have fixed pitch, but words are
447       // delimited by space which can be narrower than characters.
448       if (real_pitch > pitch && real_pitch < pitch * 2.0 && real_pitch - box1.x_gap(box2) < pitch) {
449         return true;
450       }
451     }
452     return false;
453   }
454 
is_interesting_blob(const BLOBNBOX * blob)455   static bool is_interesting_blob(const BLOBNBOX *blob) {
456     return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
457   }
458 
459   // Cleanup chars that are already merged to others.
DeleteChars()460   void DeleteChars() {
461     unsigned index = 0;
462     for (unsigned i = 0; i < characters_.size(); ++i) {
463       if (!characters_[i].delete_flag()) {
464         if (index != i) {
465           characters_[index] = characters_[i];
466         }
467         index++;
468       }
469     }
470     characters_.resize(index);
471   }
472 
473   float pitch_ = 0.0f;           // Character pitch.
474   float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
475   // to be good enough.
476   float height_ = 0.0f; // Character height.
477   float gap_ = 0.0f;    // Minimum gap between characters.
478 
479   // Pitches between any two successive characters.
480   SimpleStats all_pitches_;
481   // Gaps between any two successive characters.
482   SimpleStats all_gaps_;
483   // Pitches between any two successive characters that are consistent
484   // with the fixed pitch model.
485   SimpleStats good_pitches_;
486   // Gaps between any two successive characters that are consistent
487   // with the fixed pitch model.
488   SimpleStats good_gaps_;
489 
490   SimpleStats heights_;
491 
492   std::vector<FPChar> characters_;
493   TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
494 };
495 
Init(TO_ROW * row)496 void FPRow::Init(TO_ROW *row) {
497   ASSERT_HOST(row != nullptr);
498   ASSERT_HOST(row->xheight > 0);
499   real_row_ = row;
500   real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
501 
502   BLOBNBOX_IT blob_it = row->blob_list();
503   // Initialize characters_ and compute the initial estimation of
504   // character height.
505   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
506     if (is_interesting_blob(blob_it.data())) {
507       FPChar fp_char;
508       fp_char.Init(blob_it.data());
509       // Merge unconditionally if two blobs overlap.
510       if (!characters_.empty() && significant_overlap(fp_char.box(), characters_.back().box())) {
511         characters_.back().Merge(fp_char);
512       } else {
513         characters_.push_back(fp_char);
514       }
515       TBOX bound = blob_it.data()->bounding_box();
516       if (bound.height() * 3.0 > bound.width()) {
517         heights_.Add(bound.height());
518       }
519     }
520   }
521   heights_.Finish();
522   height_ = heights_.ile(0.875);
523 }
524 
OutputEstimations()525 void FPRow::OutputEstimations() {
526   if (good_pitches_.empty()) {
527     pitch_ = 0.0f;
528     real_row_->pitch_decision = PITCH_CORR_PROP;
529     return;
530   }
531 
532   pitch_ = good_pitches_.median();
533   real_row_->fixed_pitch = pitch_;
534   // good_gaps_.ile(0.125) can be large if most characters on the row
535   // are skinny. Use pitch_ - height_ instead if it's smaller, but
536   // positive.
537   real_row_->kern_size = real_row_->pr_nonsp =
538       std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
539   real_row_->body_size = pitch_ - real_row_->kern_size;
540 
541   if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
542     // If more than half of the characters of a line don't fit to the
543     // fixed pitch model, consider the line to be proportional. 50%
544     // seems to be a good threshold in practice as well.
545     // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
546     // real_row_ as a partial estimation result and try to use them in the
547     // normalization process.
548     real_row_->pitch_decision = PITCH_CORR_PROP;
549     return;
550   } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
551     real_row_->pitch_decision = PITCH_DEF_FIXED;
552   } else {
553     real_row_->pitch_decision = PITCH_CORR_FIXED;
554   }
555 
556   real_row_->space_size = real_row_->pr_space = pitch_;
557   // Set min_space to 50% of character pitch so that we can break CJK
558   // text at a half-width space after punctuation.
559   real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
560 
561   // Don't consider a quarter space as a real space, because it's used
562   // for line justification in traditional Japanese books.
563   real_row_->max_nonspace =
564       std::max(pitch_ * 0.25 + good_gaps_.minimum(), static_cast<double>(good_gaps_.ile(0.875)));
565 
566   int space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
567                                  static_cast<int>(real_row_->xheight));
568 
569   // Make max_nonspace larger than any intra-character gap so that
570   // make_prop_words() won't break a row at the middle of a character.
571   for (size_t i = 0; i < num_chars(); ++i) {
572     if (characters_[i].max_gap() > real_row_->max_nonspace) {
573       real_row_->max_nonspace = characters_[i].max_gap();
574     }
575   }
576   real_row_->space_threshold = std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
577                                         static_cast<int>(real_row_->xheight));
578   real_row_->used_dm_model = false;
579 
580   // Setup char_cells.
581   ICOORDELT_IT cell_it = &real_row_->char_cells;
582   auto *cell = new ICOORDELT(real_body(0).left(), 0);
583   cell_it.add_after_then_move(cell);
584 
585   int right = real_body(0).right();
586   for (size_t i = 1; i < num_chars(); ++i) {
587     // Put a word break if gap between two characters is bigger than
588     // space_threshold.  Don't break if none of two characters
589     // couldn't be "finalized", because maybe they need to be merged
590     // to one character.
591     if ((is_final(i - 1) || is_final(i)) &&
592         real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
593       cell = new ICOORDELT(right + 1, 0);
594       cell_it.add_after_then_move(cell);
595       while (right + pitch_ < box(i).left()) {
596         right += pitch_;
597         cell = new ICOORDELT(right + 1, 0);
598         cell_it.add_after_then_move(cell);
599       }
600       right = box(i).left();
601     }
602     cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
603     cell_it.add_after_then_move(cell);
604     right = real_body(i).right();
605   }
606 
607   cell = new ICOORDELT(right + 1, 0);
608   cell_it.add_after_then_move(cell);
609 
610   // TODO(takenaka): add code to store alignment/fragmentation
611   // information to blobs so that it can be reused later, e.g. in
612   // recognition phase.
613 }
614 
EstimatePitch(bool pass1)615 void FPRow::EstimatePitch(bool pass1) {
616   good_pitches_.Clear();
617   all_pitches_.Clear();
618   good_gaps_.Clear();
619   all_gaps_.Clear();
620   heights_.Clear();
621   if (num_chars() == 0) {
622     return;
623   }
624 
625   int32_t cx0, cx1;
626   bool prev_was_good = is_good(0);
627   cx0 = center_x(0);
628 
629   heights_.Add(box(0).height());
630   for (size_t i = 1; i < num_chars(); i++) {
631     cx1 = center_x(i);
632     int32_t pitch = cx1 - cx0;
633     int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
634 
635     heights_.Add(box(i).height());
636     // Ignore if the pitch is too close.  But don't ignore wide pitch
637     // may be the result of large tracking.
638     if (pitch > height_ * 0.5) {
639       all_pitches_.Add(pitch);
640       all_gaps_.Add(gap);
641       if (is_good(i)) {
642         // In pass1 (after Pass1Analyze()), all characters marked as
643         // "good" have a good consistent pitch with their previous
644         // characters.  However, it's not true in pass2 and a good
645         // character may have a good pitch only between its successor.
646         // So we collect only pitch values between two good
647         // characters. and within tolerance in pass2.
648         if (pass1 ||
649             (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
650           good_pitches_.Add(pitch);
651           if (!is_box_modified(i - 1) && !is_box_modified(i)) {
652             good_gaps_.Add(gap);
653           }
654         }
655         prev_was_good = true;
656       } else {
657         prev_was_good = false;
658       }
659     }
660     cx0 = cx1;
661   }
662 
663   good_pitches_.Finish();
664   all_pitches_.Finish();
665   good_gaps_.Finish();
666   all_gaps_.Finish();
667   heights_.Finish();
668 
669   height_ = heights_.ile(0.875);
670   if (all_pitches_.empty()) {
671     pitch_ = 0.0f;
672     gap_ = 0.0f;
673   } else if (good_pitches_.size() < 2) {
674     // We don't have enough data to estimate the pitch of this row yet.
675     // Use median of all pitches as the initial guess.
676     pitch_ = all_pitches_.median();
677     ASSERT_HOST(pitch_ > 0.0f);
678     gap_ = all_gaps_.ile(0.125);
679   } else {
680     pitch_ = good_pitches_.median();
681     ASSERT_HOST(pitch_ > 0.0f);
682     gap_ = good_gaps_.ile(0.125);
683   }
684 }
685 
DebugOutputResult(int row_index)686 void FPRow::DebugOutputResult(int row_index) {
687   if (num_chars() > 0) {
688     tprintf(
689         "Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
690         "space_size=%f, space_threshold=%d, xheight=%f\n",
691         row_index, static_cast<int>(real_row_->pitch_decision), real_row_->fixed_pitch,
692         real_row_->max_nonspace, real_row_->space_size, real_row_->space_threshold,
693         real_row_->xheight);
694 
695     for (unsigned i = 0; i < num_chars(); i++) {
696       tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", i, is_final(i), is_good(i),
697               character(i)->num_blobs());
698       box(i).print();
699     }
700   }
701 }
702 
Pass1Analyze()703 void FPRow::Pass1Analyze() {
704   if (num_chars() < 2) {
705     return;
706   }
707 
708   if (estimated_pitch_ > 0.0f) {
709     for (size_t i = 2; i < num_chars(); i++) {
710       if (is_good_pitch(estimated_pitch_, box(i - 2), box(i - 1)) &&
711           is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
712         mark_good(i - 1);
713       }
714     }
715   } else {
716     for (size_t i = 2; i < num_chars(); i++) {
717       if (is_good_pitch(box_pitch(box(i - 2), box(i - 1)), box(i - 1), box(i))) {
718         mark_good(i - 1);
719       }
720     }
721   }
722   character(0)->set_alignment(character(1)->alignment());
723   character(num_chars() - 1)->set_alignment(character(num_chars() - 2)->alignment());
724 }
725 
Pass2Analyze()726 bool FPRow::Pass2Analyze() {
727   bool changed = false;
728   if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
729     return false;
730   }
731   for (size_t i = 0; i < num_chars(); i++) {
732     if (is_final(i)) {
733       continue;
734     }
735 
736     FPChar::Alignment alignment = character(i)->alignment();
737     bool intersecting = false;
738     bool not_intersecting = false;
739 
740     if (i < num_chars() - 1 && is_final(i + 1)) {
741       // Next character is already finalized. Estimate the imaginary
742       // body including this character based on the character. Skip
743       // whitespace if necessary.
744       bool skipped_whitespaces = false;
745       float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
746       while (c1 > box(i).right()) {
747         skipped_whitespaces = true;
748         c1 -= estimated_pitch_;
749       }
750       TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
751 
752       // Collect all characters that mostly fit in the region.
753       // Also, their union height shouldn't be too big.
754       int j = i;
755       TBOX merged;
756       while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
757              merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
758         merged += box(j);
759         j--;
760       }
761 
762       if (j >= 0 && significant_overlap(ibody, box(j))) {
763         // character(j) lies on the character boundary and doesn't fit
764         // well into the imaginary body.
765         if (!is_final(j)) {
766           intersecting = true;
767         }
768       } else {
769         not_intersecting = true;
770         if (i - j > 0) {
771           // Merge character(j+1) ... character(i) because they fit
772           // into the body nicely.
773           if (i - j == 1) {
774             // Only one char in the imaginary body.
775             if (!skipped_whitespaces) {
776               mark_good(i);
777             }
778             // set ibody as bounding box of this character to get
779             // better pitch analysis result for halfwidth glyphs
780             // followed by a halfwidth space.
781             if (box(i).width() <= estimated_pitch_ * 0.5) {
782               ibody += box(i);
783               character(i)->set_box(ibody);
784             }
785             character(i)->set_merge_to_prev(false);
786             finalize(i);
787           } else {
788             for (int k = i; k > j + 1; k--) {
789               character(k)->set_merge_to_prev(true);
790             }
791           }
792         }
793       }
794     }
795     if (i > 0 && is_final(i - 1)) {
796       // Now we repeat everything from the opposite side.  Previous
797       // character is already finalized. Estimate the imaginary body
798       // including this character based on the character.
799       bool skipped_whitespaces = false;
800       float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
801       while (c1 < box(i).left()) {
802         skipped_whitespaces = true;
803         c1 += estimated_pitch_;
804       }
805       TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
806 
807       size_t j = i;
808       TBOX merged;
809       while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
810              merged.bounding_union(box(j)).height() < estimated_pitch_ * (1 + kFPTolerance)) {
811         merged += box(j);
812         j++;
813       }
814 
815       if (j < num_chars() && significant_overlap(ibody, box(j))) {
816         if (!is_final(j)) {
817           intersecting = true;
818         }
819       } else {
820         not_intersecting = true;
821         if (j - i > 0) {
822           if (j - i == 1) {
823             if (!skipped_whitespaces) {
824               mark_good(i);
825             }
826             if (box(i).width() <= estimated_pitch_ * 0.5) {
827               ibody += box(i);
828               character(i)->set_box(ibody);
829             }
830             character(i)->set_merge_to_prev(false);
831             finalize(i);
832           } else {
833             for (size_t k = i + 1; k < j; k++) {
834               character(k)->set_merge_to_prev(true);
835             }
836           }
837         }
838       }
839     }
840 
841     // This character doesn't fit well into the estimated imaginary
842     // bodies. Mark it as bad.
843     if (intersecting && !not_intersecting) {
844       mark_bad(i);
845     }
846     if (character(i)->alignment() != alignment || character(i)->merge_to_prev()) {
847       changed = true;
848     }
849   }
850 
851   return changed;
852 }
853 
MergeFragments()854 void FPRow::MergeFragments() {
855   int last_char = 0;
856 
857   for (size_t j = 0; j < num_chars(); ++j) {
858     if (character(j)->merge_to_prev()) {
859       character(last_char)->Merge(*character(j));
860       character(j)->set_delete_flag(true);
861       clear_alignment(last_char);
862       character(j - 1)->set_merge_to_prev(false);
863     } else {
864       last_char = j;
865     }
866   }
867   DeleteChars();
868 }
869 
FinalizeLargeChars()870 void FPRow::FinalizeLargeChars() {
871   float row_pitch = estimated_pitch();
872   for (size_t i = 0; i < num_chars(); i++) {
873     if (is_final(i)) {
874       continue;
875     }
876 
877     // Finalize if both neighbors are finalized. We have no other choice.
878     if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
879       finalize(i);
880       continue;
881     }
882 
883     float cx = center_x(i);
884     TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
885     if (i > 0) {
886       // The preceding character significantly intersects with the
887       // imaginary body of this character. Let Pass2Analyze() handle
888       // this case.
889       if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) {
890         continue;
891       }
892       if (!is_final(i - 1)) {
893         TBOX merged = box(i);
894         merged += box(i - 1);
895         if (merged.width() < row_pitch) {
896           continue;
897         }
898         // This character cannot be finalized yet because it can be
899         // merged with the previous one.  Again, let Pass2Analyze()
900         // handle this case.
901       }
902     }
903     if (i < num_chars() - 1) {
904       if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) {
905         continue;
906       }
907       if (!is_final(i + 1)) {
908         TBOX merged = box(i);
909         merged += box(i + 1);
910         if (merged.width() < row_pitch) {
911           continue;
912         }
913       }
914     }
915     finalize(i);
916   }
917 
918   // Update alignment decision.  We only consider finalized characters
919   // in pass2.  E.g. if a finalized character C has another finalized
920   // character L on its left and a not-finalized character R on its
921   // right, we mark C as good if the pitch between C and L is good,
922   // regardless of the pitch between C and R.
923   for (size_t i = 0; i < num_chars(); i++) {
924     if (!is_final(i)) {
925       continue;
926     }
927     bool good_pitch = false;
928     bool bad_pitch = false;
929     if (i > 0 && is_final(i - 1)) {
930       if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
931         good_pitch = true;
932       } else {
933         bad_pitch = true;
934       }
935     }
936     if (i < num_chars() - 1 && is_final(i + 1)) {
937       if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
938         good_pitch = true;
939       } else {
940         bad_pitch = true;
941       }
942     }
943     if (good_pitch && !bad_pitch) {
944       mark_good(i);
945     } else if (!good_pitch && bad_pitch) {
946       mark_bad(i);
947     }
948   }
949 }
950 
951 class FPAnalyzer {
952 public:
953   FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
954   ~FPAnalyzer() = default;
955 
Pass1Analyze()956   void Pass1Analyze() {
957     for (auto &row : rows_) {
958       row.Pass1Analyze();
959     }
960   }
961 
962   // Estimate character pitch for each row.  The argument pass1 can be
963   // set to true if the function is called after Pass1Analyze(), to
964   // eliminate some redundant computation.
965   void EstimatePitch(bool pass1);
966 
maybe_fixed_pitch()967   bool maybe_fixed_pitch() {
968     if (rows_.empty() || rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) {
969       return false;
970     }
971     return true;
972   }
973 
MergeFragments()974   void MergeFragments() {
975     for (auto &row : rows_) {
976       row.MergeFragments();
977     }
978   }
979 
FinalizeLargeChars()980   void FinalizeLargeChars() {
981     for (auto &row : rows_) {
982       row.FinalizeLargeChars();
983     }
984   }
985 
Pass2Analyze()986   bool Pass2Analyze() {
987     bool changed = false;
988     for (auto &row : rows_) {
989       if (row.Pass2Analyze()) {
990         changed = true;
991       }
992     }
993     return changed;
994   }
995 
OutputEstimations()996   void OutputEstimations() {
997     for (auto &row : rows_) {
998       row.OutputEstimations();
999     }
1000     // Don't we need page-level estimation of gaps/spaces?
1001   }
1002 
DebugOutputResult()1003   void DebugOutputResult() {
1004     tprintf("FPAnalyzer: final result\n");
1005     for (size_t i = 0; i < rows_.size(); i++) {
1006       rows_[i].DebugOutputResult(i);
1007     }
1008   }
1009 
num_rows()1010   size_t num_rows() {
1011     return rows_.size();
1012   }
1013 
1014   // Returns the upper limit for pass2 loop iteration.
max_iteration()1015   unsigned max_iteration() {
1016     // We're fixing at least one character per iteration. So basically
1017     // we shouldn't require more than max_chars_per_row_ iterations.
1018     return max_chars_per_row_ + 100;
1019   }
1020 
1021 private:
1022   ICOORD page_tr_;
1023   std::vector<FPRow> rows_;
1024   unsigned num_tall_rows_;
1025   unsigned num_bad_rows_;
1026   // TODO: num_empty_rows_ is incremented, but never used otherwise.
1027   unsigned num_empty_rows_;
1028   unsigned max_chars_per_row_;
1029 };
1030 
FPAnalyzer(ICOORD page_tr,TO_BLOCK_LIST * port_blocks)1031 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
1032     : page_tr_(page_tr)
1033     , num_tall_rows_(0)
1034     , num_bad_rows_(0)
1035     , num_empty_rows_(0)
1036     , max_chars_per_row_(0) {
1037   TO_BLOCK_IT block_it(port_blocks);
1038 
1039   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1040     TO_BLOCK *block = block_it.data();
1041     if (!block->get_rows()->empty()) {
1042       ASSERT_HOST(block->xheight > 0);
1043       find_repeated_chars(block, false);
1044     }
1045   }
1046 
1047   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
1048     TO_ROW_IT row_it = block_it.data()->get_rows();
1049     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1050       FPRow row;
1051       row.Init(row_it.data());
1052       rows_.push_back(row);
1053       size_t num_chars = rows_.back().num_chars();
1054       if (num_chars <= 1) {
1055         num_empty_rows_++;
1056       }
1057       if (num_chars > max_chars_per_row_) {
1058         max_chars_per_row_ = num_chars;
1059       }
1060     }
1061   }
1062 }
1063 
EstimatePitch(bool pass1)1064 void FPAnalyzer::EstimatePitch(bool pass1) {
1065   LocalCorrelation pitch_height_stats;
1066 
1067   num_tall_rows_ = 0;
1068   num_bad_rows_ = 0;
1069   pitch_height_stats.Clear();
1070   for (auto &row : rows_) {
1071     row.EstimatePitch(pass1);
1072     if (row.good_pitches()) {
1073       pitch_height_stats.Add(row.height() + row.gap(), row.pitch(), row.good_pitches());
1074       if (row.height_pitch_ratio() > 1.1) {
1075         num_tall_rows_++;
1076       }
1077     } else {
1078       num_bad_rows_++;
1079     }
1080   }
1081 
1082   pitch_height_stats.Finish();
1083   for (auto &row : rows_) {
1084     if (row.good_pitches() >= 5) {
1085       // We have enough evidences. Just use the pitch estimation
1086       // from this row.
1087       row.set_estimated_pitch(row.pitch());
1088     } else if (row.num_chars() > 1) {
1089       float estimated_pitch = pitch_height_stats.EstimateYFor(row.height() + row.gap(), 0.1f);
1090       // CJK characters are more likely to be fragmented than poorly
1091       // chopped. So trust the page-level estimation of character
1092       // pitch only if it's larger than row-level estimation or
1093       // row-level estimation is too large (2x bigger than row height).
1094       if (estimated_pitch > row.pitch() || row.pitch() > row.height() * 2.0) {
1095         row.set_estimated_pitch(estimated_pitch);
1096       } else {
1097         row.set_estimated_pitch(row.pitch());
1098       }
1099     }
1100   }
1101 }
1102 
compute_fixed_pitch_cjk(ICOORD page_tr,TO_BLOCK_LIST * port_blocks)1103 void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
1104   FPAnalyzer analyzer(page_tr, port_blocks);
1105   if (analyzer.num_rows() == 0) {
1106     return;
1107   }
1108 
1109   analyzer.Pass1Analyze();
1110   analyzer.EstimatePitch(true);
1111 
1112   // Perform pass1 analysis again with the initial estimation of row
1113   // pitches, for better estimation.
1114   analyzer.Pass1Analyze();
1115   analyzer.EstimatePitch(true);
1116 
1117   // Early exit if the page doesn't seem to contain fixed pitch rows.
1118   if (!analyzer.maybe_fixed_pitch()) {
1119     if (textord_debug_pitch_test) {
1120       tprintf("Page doesn't seem to contain fixed pitch rows\n");
1121     }
1122     return;
1123   }
1124 
1125   unsigned iteration = 0;
1126   do {
1127     analyzer.MergeFragments();
1128     analyzer.FinalizeLargeChars();
1129     analyzer.EstimatePitch(false);
1130     iteration++;
1131   } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1132 
1133   if (textord_debug_pitch_test) {
1134     tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", iteration,
1135             analyzer.max_iteration());
1136   }
1137 
1138   analyzer.OutputEstimations();
1139   if (textord_debug_pitch_test) {
1140     analyzer.DebugOutputResult();
1141   }
1142 }
1143 
1144 } // namespace tesseract
1145