1 /**********************************************************************
2  * File:        pithsync.cpp  (Formerly pitsync2.c)
3  * Description: Code to find the optimum fixed pitch segmentation of some blobs.
4  * Author:      Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "pithsync.h"
20 
21 #include "makerow.h"
22 #include "pitsync1.h"
23 #include "topitch.h"
24 #include "tprintf.h"
25 
26 #include <cfloat> // for FLT_MAX
27 #include <cmath>
28 #include <vector> // for std::vector
29 
30 namespace tesseract {
31 
32 /**********************************************************************
33  * FPCUTPT::setup
34  *
35  * Constructor to make a new FPCUTPT.
36  **********************************************************************/
37 
setup(FPCUTPT * cutpts,int16_t array_origin,STATS * projection,int16_t zero_count,int16_t pitch,int16_t x,int16_t offset)38 void FPCUTPT::setup(      // constructor
39     FPCUTPT *cutpts,      // predecessors
40     int16_t array_origin, // start coord
41     STATS *projection,    // vertical occupation
42     int16_t zero_count,   // official zero
43     int16_t pitch,        // proposed pitch
44     int16_t x,            // position
45     int16_t offset        // dist to gap
46 ) {
47   // half of pitch
48   int16_t half_pitch = pitch / 2 - 1;
49   uint32_t lead_flag; // new flag
50   int32_t ind;        // current position
51 
52   if (half_pitch > 31) {
53     half_pitch = 31;
54   } else if (half_pitch < 0) {
55     half_pitch = 0;
56   }
57   lead_flag = 1 << half_pitch;
58 
59   pred = nullptr;
60   mean_sum = 0;
61   sq_sum = offset * offset;
62   cost = sq_sum;
63   faked = false;
64   terminal = false;
65   fake_count = 0;
66   xpos = x;
67   region_index = 0;
68   mid_cuts = 0;
69   if (x == array_origin) {
70     back_balance = 0;
71     fwd_balance = 0;
72     for (ind = 0; ind <= half_pitch; ind++) {
73       fwd_balance >>= 1;
74       if (projection->pile_count(ind) > zero_count) {
75         fwd_balance |= lead_flag;
76       }
77     }
78   } else {
79     back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
80     back_balance &= lead_flag + (lead_flag - 1);
81     if (projection->pile_count(x) > zero_count) {
82       back_balance |= 1;
83     }
84     fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
85     if (projection->pile_count(x + half_pitch) > zero_count) {
86       fwd_balance |= lead_flag;
87     }
88   }
89 }
90 
91 /**********************************************************************
92  * FPCUTPT::assign
93  *
94  * Constructor to make a new FPCUTPT.
95  **********************************************************************/
96 
assign(FPCUTPT * cutpts,int16_t array_origin,int16_t x,bool faking,bool mid_cut,int16_t offset,STATS * projection,float projection_scale,int16_t zero_count,int16_t pitch,int16_t pitch_error)97 void FPCUTPT::assign(       // constructor
98     FPCUTPT *cutpts,        // predecessors
99     int16_t array_origin,   // start coord
100     int16_t x,              // position
101     bool faking,            // faking this one
102     bool mid_cut,           // cheap cut.
103     int16_t offset,         // dist to gap
104     STATS *projection,      // vertical occupation
105     float projection_scale, // scaling
106     int16_t zero_count,     // official zero
107     int16_t pitch,          // proposed pitch
108     int16_t pitch_error     // allowed tolerance
109 ) {
110   int index;             // test index
111   int balance_index;     // for balance factor
112   int16_t balance_count; // ding factor
113   int16_t r_index;       // test cut number
114   FPCUTPT *segpt;        // segment point
115   int32_t dist;          // from prev segment
116   double sq_dist;        // squared distance
117   double mean;           // mean pitch
118   double total;          // total dists
119   double factor;         // cost function
120                          // half of pitch
121   int16_t half_pitch = pitch / 2 - 1;
122   uint32_t lead_flag; // new flag
123 
124   if (half_pitch > 31) {
125     half_pitch = 31;
126   } else if (half_pitch < 0) {
127     half_pitch = 0;
128   }
129   lead_flag = 1 << half_pitch;
130 
131   back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
132   back_balance &= lead_flag + (lead_flag - 1);
133   if (projection->pile_count(x) > zero_count) {
134     back_balance |= 1;
135   }
136   fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
137   if (projection->pile_count(x + half_pitch) > zero_count) {
138     fwd_balance |= lead_flag;
139   }
140 
141   xpos = x;
142   cost = FLT_MAX;
143   pred = nullptr;
144   faked = faking;
145   terminal = false;
146   region_index = 0;
147   fake_count = INT16_MAX;
148   for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; index++) {
149     if (index >= array_origin) {
150       segpt = &cutpts[index - array_origin];
151       dist = x - segpt->xpos;
152       if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
153         balance_count = 0;
154         if (textord_balance_factor > 0) {
155           if (textord_fast_pitch_test) {
156             lead_flag = back_balance ^ segpt->fwd_balance;
157             balance_count = 0;
158             while (lead_flag != 0) {
159               balance_count++;
160               lead_flag &= lead_flag - 1;
161             }
162           } else {
163             for (balance_index = 0; index + balance_index < x - balance_index; balance_index++) {
164               balance_count += (projection->pile_count(index + balance_index) <= zero_count) ^
165                                (projection->pile_count(x - balance_index) <= zero_count);
166             }
167           }
168           balance_count =
169               static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
170         }
171         r_index = segpt->region_index + 1;
172         total = segpt->mean_sum + dist;
173         balance_count += offset;
174         sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count;
175         mean = total / r_index;
176         factor = mean - pitch;
177         factor *= factor;
178         factor += sq_dist / (r_index)-mean * mean;
179         if (factor < cost && segpt->fake_count + faked <= fake_count) {
180           cost = factor; // find least cost
181           pred = segpt;  // save path
182           mean_sum = total;
183           sq_sum = sq_dist;
184           fake_count = segpt->fake_count + faked;
185           mid_cuts = segpt->mid_cuts + mid_cut;
186           region_index = r_index;
187         }
188       }
189     }
190   }
191 }
192 
193 /**********************************************************************
194  * FPCUTPT::assign_cheap
195  *
196  * Constructor to make a new FPCUTPT on the cheap.
197  **********************************************************************/
198 
assign_cheap(FPCUTPT * cutpts,int16_t array_origin,int16_t x,bool faking,bool mid_cut,int16_t offset,STATS * projection,float projection_scale,int16_t zero_count,int16_t pitch,int16_t pitch_error)199 void FPCUTPT::assign_cheap( // constructor
200     FPCUTPT *cutpts,        // predecessors
201     int16_t array_origin,   // start coord
202     int16_t x,              // position
203     bool faking,            // faking this one
204     bool mid_cut,           // cheap cut.
205     int16_t offset,         // dist to gap
206     STATS *projection,      // vertical occupation
207     float projection_scale, // scaling
208     int16_t zero_count,     // official zero
209     int16_t pitch,          // proposed pitch
210     int16_t pitch_error     // allowed tolerance
211 ) {
212   int index;             // test index
213   int16_t balance_count; // ding factor
214   int16_t r_index;       // test cut number
215   FPCUTPT *segpt;        // segment point
216   int32_t dist;          // from prev segment
217   double sq_dist;        // squared distance
218   double mean;           // mean pitch
219   double total;          // total dists
220   double factor;         // cost function
221                          // half of pitch
222   int16_t half_pitch = pitch / 2 - 1;
223   uint32_t lead_flag; // new flag
224 
225   if (half_pitch > 31) {
226     half_pitch = 31;
227   } else if (half_pitch < 0) {
228     half_pitch = 0;
229   }
230   lead_flag = 1 << half_pitch;
231 
232   back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
233   back_balance &= lead_flag + (lead_flag - 1);
234   if (projection->pile_count(x) > zero_count) {
235     back_balance |= 1;
236   }
237   fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
238   if (projection->pile_count(x + half_pitch) > zero_count) {
239     fwd_balance |= lead_flag;
240   }
241 
242   xpos = x;
243   cost = FLT_MAX;
244   pred = nullptr;
245   faked = faking;
246   terminal = false;
247   region_index = 0;
248   fake_count = INT16_MAX;
249   index = x - pitch;
250   if (index >= array_origin) {
251     segpt = &cutpts[index - array_origin];
252     dist = x - segpt->xpos;
253     if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
254       balance_count = 0;
255       if (textord_balance_factor > 0) {
256         lead_flag = back_balance ^ segpt->fwd_balance;
257         balance_count = 0;
258         while (lead_flag != 0) {
259           balance_count++;
260           lead_flag &= lead_flag - 1;
261         }
262         balance_count =
263             static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
264       }
265       r_index = segpt->region_index + 1;
266       total = segpt->mean_sum + dist;
267       balance_count += offset;
268       sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count;
269       mean = total / r_index;
270       factor = mean - pitch;
271       factor *= factor;
272       factor += sq_dist / (r_index)-mean * mean;
273       cost = factor; // find least cost
274       pred = segpt;  // save path
275       mean_sum = total;
276       sq_sum = sq_dist;
277       fake_count = segpt->fake_count + faked;
278       mid_cuts = segpt->mid_cuts + mid_cut;
279       region_index = r_index;
280     }
281   }
282 }
283 
284 /**********************************************************************
285  * check_pitch_sync
286  *
287  * Construct the lattice of possible segmentation points and choose the
288  * optimal path. Return the optimal path only.
289  * The return value is a measure of goodness of the sync.
290  **********************************************************************/
291 
check_pitch_sync2(BLOBNBOX_IT * blob_it,int16_t blob_count,int16_t pitch,int16_t pitch_error,STATS * projection,int16_t projection_left,int16_t projection_right,float projection_scale,int16_t & occupation_count,FPSEGPT_LIST * seg_list,int16_t start,int16_t end)292 double check_pitch_sync2(    // find segmentation
293     BLOBNBOX_IT *blob_it,    // blobs to do
294     int16_t blob_count,      // no of blobs
295     int16_t pitch,           // pitch estimate
296     int16_t pitch_error,     // tolerance
297     STATS *projection,       // vertical
298     int16_t projection_left, // edges //scale factor
299     int16_t projection_right, float projection_scale,
300     int16_t &occupation_count, // no of occupied cells
301     FPSEGPT_LIST *seg_list,    // output list
302     int16_t start,             // start of good range
303     int16_t end                // end of good range
304 ) {
305   bool faking;                  // illegal cut pt
306   bool mid_cut;                 // cheap cut pt.
307   int16_t x;                    // current coord
308   int16_t blob_index;           // blob number
309   int16_t left_edge;            // of word
310   int16_t right_edge;           // of word
311   int16_t array_origin;         // x coord of array
312   int16_t offset;               // dist to legal area
313   int16_t zero_count;           // projection zero
314   int16_t best_left_x = 0;      // for equals
315   int16_t best_right_x = 0;     // right edge
316   TBOX this_box;                // bounding box
317   TBOX next_box;                // box of next blob
318   FPSEGPT *segpt;               // segment point
319   double best_cost;             // best path
320   double mean_sum;              // computes result
321   FPCUTPT *best_end;            // end of best path
322   int16_t best_fake;            // best fake level
323   int16_t best_count;           // no of cuts
324   BLOBNBOX_IT this_it;          // copy iterator
325   FPSEGPT_IT seg_it = seg_list; // output iterator
326 
327   //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
328   //              blob_count, pitch);
329   //      if (blob_count==8 && pitch==27)
330   //              projection->print(stdout,true);
331   zero_count = 0;
332   if (pitch < 3) {
333     pitch = 3; // nothing ludicrous
334   }
335   if ((pitch - 3) / 2 < pitch_error) {
336     pitch_error = (pitch - 3) / 2;
337   }
338   this_it = *blob_it;
339   this_box = box_next(&this_it); // get box
340   //      left_edge=this_box.left(); //left of word right_edge=this_box.right();
341   //      for (blob_index=1;blob_index<blob_count;blob_index++)
342   //      {
343   //              this_box=box_next(&this_it);
344   //              if (this_box.right()>right_edge)
345   //                      right_edge=this_box.right();
346   //      }
347   for (left_edge = projection_left;
348        projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
349     ;
350   }
351   for (right_edge = projection_right;
352        projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
353     ;
354   }
355   ASSERT_HOST(right_edge >= left_edge);
356   if (pitsync_linear_version >= 4) {
357     return check_pitch_sync3(projection_left, projection_right, zero_count, pitch, pitch_error,
358                              projection, projection_scale, occupation_count, seg_list, start, end);
359   }
360   array_origin = left_edge - pitch;
361   // array of points
362   std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
363   for (x = array_origin; x < left_edge; x++) {
364     // free cuts
365     cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
366   }
367   for (offset = 0; offset <= pitch_error; offset++, x++) {
368     // not quite free
369     cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
370                                    offset);
371   }
372 
373   this_it = *blob_it;
374   best_cost = FLT_MAX;
375   best_end = nullptr;
376   this_box = box_next(&this_it); // first box
377   next_box = box_next(&this_it); // second box
378   blob_index = 1;
379   while (x < right_edge - pitch_error) {
380     if (x > this_box.right() + pitch_error && blob_index < blob_count) {
381       this_box = next_box;
382       next_box = box_next(&this_it);
383       blob_index++;
384     }
385     faking = false;
386     mid_cut = false;
387     if (x <= this_box.left()) {
388       offset = 0;
389     } else if (x <= this_box.left() + pitch_error) {
390       offset = x - this_box.left();
391     } else if (x >= this_box.right()) {
392       offset = 0;
393     } else if (x >= next_box.left() && blob_index < blob_count) {
394       offset = x - next_box.left();
395       if (this_box.right() - x < offset) {
396         offset = this_box.right() - x;
397       }
398     } else if (x >= this_box.right() - pitch_error) {
399       offset = this_box.right() - x;
400     } else if (x - this_box.left() > pitch * pitsync_joined_edge &&
401                this_box.right() - x > pitch * pitsync_joined_edge) {
402       mid_cut = true;
403       offset = 0;
404     } else {
405       faking = true;
406       offset = projection->pile_count(x);
407     }
408     cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
409                                     projection, projection_scale, zero_count, pitch, pitch_error);
410     x++;
411   }
412 
413   best_fake = INT16_MAX;
414   best_cost = INT32_MAX;
415   best_count = INT16_MAX;
416   while (x < right_edge + pitch) {
417     offset = x < right_edge ? right_edge - x : 0;
418     cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
419                                     projection_scale, zero_count, pitch, pitch_error);
420     cutpts[x - array_origin].terminal = true;
421     if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
422         best_count + best_fake) {
423       if (cutpts[x - array_origin].fake_count < best_fake ||
424           (cutpts[x - array_origin].fake_count == best_fake &&
425            cutpts[x - array_origin].cost_function() < best_cost)) {
426         best_fake = cutpts[x - array_origin].fake_count;
427         best_cost = cutpts[x - array_origin].cost_function();
428         best_left_x = x;
429         best_right_x = x;
430         best_count = cutpts[x - array_origin].index();
431       } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
432                  cutpts[x - array_origin].cost_function() == best_cost) {
433         // exactly equal
434         best_right_x = x;
435       }
436     }
437     x++;
438   }
439   ASSERT_HOST(best_fake < INT16_MAX);
440 
441   best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
442   if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) {
443     for (x = left_edge - pitch; x < right_edge + pitch; x++) {
444       tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, cutpts[x - array_origin].cost_function(),
445               cutpts[x - array_origin].sum(), cutpts[x - array_origin].squares(),
446               cutpts[x - array_origin].previous()->position());
447     }
448   }
449   occupation_count = -1;
450   do {
451     for (x = best_end->position() - pitch + pitch_error;
452          x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
453       ;
454     }
455     if (x < best_end->position() - pitch_error) {
456       occupation_count++;
457     }
458     // copy it
459     segpt = new FPSEGPT(best_end);
460     seg_it.add_before_then_move(segpt);
461     best_end = best_end->previous();
462   } while (best_end != nullptr);
463   seg_it.move_to_last();
464   mean_sum = seg_it.data()->sum();
465   mean_sum = mean_sum * mean_sum / best_count;
466   if (seg_it.data()->squares() - mean_sum < 0) {
467     tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
468             seg_it.data()->sum(), best_count);
469   }
470   //      tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
471   //              blob_count,pitch,seg_it.data()->squares()-mean_sum,
472   //              occupation_count);
473   return seg_it.data()->squares() - mean_sum;
474 }
475 
476 /**********************************************************************
477  * check_pitch_sync
478  *
479  * Construct the lattice of possible segmentation points and choose the
480  * optimal path. Return the optimal path only.
481  * The return value is a measure of goodness of the sync.
482  **********************************************************************/
483 
check_pitch_sync3(int16_t projection_left,int16_t projection_right,int16_t zero_count,int16_t pitch,int16_t pitch_error,STATS * projection,float projection_scale,int16_t & occupation_count,FPSEGPT_LIST * seg_list,int16_t start,int16_t end)484 double check_pitch_sync3(    // find segmentation
485     int16_t projection_left, // edges //to be considered 0
486     int16_t projection_right, int16_t zero_count,
487     int16_t pitch,             // pitch estimate
488     int16_t pitch_error,       // tolerance
489     STATS *projection,         // vertical
490     float projection_scale,    // scale factor
491     int16_t &occupation_count, // no of occupied cells
492     FPSEGPT_LIST *seg_list,    // output list
493     int16_t start,             // start of good range
494     int16_t end                // end of good range
495 ) {
496   bool faking;                  // illegal cut pt
497   bool mid_cut;                 // cheap cut pt.
498   int16_t left_edge;            // of word
499   int16_t right_edge;           // of word
500   int16_t x;                    // current coord
501   int16_t array_origin;         // x coord of array
502   int16_t offset;               // dist to legal area
503   int16_t projection_offset;    // from scaled projection
504   int16_t prev_zero;            // previous zero dist
505   int16_t next_zero;            // next zero dist
506   int16_t zero_offset;          // scan window
507   int16_t best_left_x = 0;      // for equals
508   int16_t best_right_x = 0;     // right edge
509   FPSEGPT *segpt;               // segment point
510   int minindex;                 // next input position
511   int test_index;               // index to mins
512   double best_cost;             // best path
513   double mean_sum;              // computes result
514   FPCUTPT *best_end;            // end of best path
515   int16_t best_fake;            // best fake level
516   int16_t best_count;           // no of cuts
517   FPSEGPT_IT seg_it = seg_list; // output iterator
518 
519   end = (end - start) % pitch;
520   if (pitch < 3) {
521     pitch = 3; // nothing ludicrous
522   }
523   if ((pitch - 3) / 2 < pitch_error) {
524     pitch_error = (pitch - 3) / 2;
525   }
526   // min dist of zero
527   zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
528   for (left_edge = projection_left;
529        projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
530     ;
531   }
532   for (right_edge = projection_right;
533        projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
534     ;
535   }
536   array_origin = left_edge - pitch;
537   // array of points
538   std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
539   // local min results
540   std::vector<bool> mins(pitch_error * 2 + 1);
541   for (x = array_origin; x < left_edge; x++) {
542     // free cuts
543     cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
544   }
545   prev_zero = left_edge - 1;
546   for (offset = 0; offset <= pitch_error; offset++, x++) {
547     // not quite free
548     cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
549                                    offset);
550   }
551 
552   best_cost = FLT_MAX;
553   best_end = nullptr;
554   for (offset = -pitch_error, minindex = 0; offset < pitch_error; offset++, minindex++) {
555     mins[minindex] = projection->local_min(x + offset);
556   }
557   next_zero = x + zero_offset + 1;
558   for (offset = next_zero - 1; offset >= x; offset--) {
559     if (projection->pile_count(offset) <= zero_count) {
560       next_zero = offset;
561       break;
562     }
563   }
564   while (x < right_edge - pitch_error) {
565     mins[minindex] = projection->local_min(x + pitch_error);
566     minindex++;
567     if (minindex > pitch_error * 2) {
568       minindex = 0;
569     }
570     faking = false;
571     mid_cut = false;
572     offset = 0;
573     if (projection->pile_count(x) <= zero_count) {
574       prev_zero = x;
575     } else {
576       for (offset = 1; offset <= pitch_error; offset++) {
577         if (projection->pile_count(x + offset) <= zero_count ||
578             projection->pile_count(x - offset) <= zero_count) {
579           break;
580         }
581       }
582     }
583     if (offset > pitch_error) {
584       if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
585         for (offset = 0; offset <= pitch_error; offset++) {
586           test_index = minindex + pitch_error + offset;
587           if (test_index > pitch_error * 2) {
588             test_index -= pitch_error * 2 + 1;
589           }
590           if (mins[test_index]) {
591             break;
592           }
593           test_index = minindex + pitch_error - offset;
594           if (test_index > pitch_error * 2) {
595             test_index -= pitch_error * 2 + 1;
596           }
597           if (mins[test_index]) {
598             break;
599           }
600         }
601       }
602       if (offset > pitch_error) {
603         offset = projection->pile_count(x);
604         faking = true;
605       } else {
606         projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
607         if (projection_offset > offset) {
608           offset = projection_offset;
609         }
610         mid_cut = true;
611       }
612     }
613     if ((start == 0 && end == 0) || !textord_fast_pitch_test ||
614         (x - projection_left - start) % pitch <= end) {
615       cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
616                                       projection, projection_scale, zero_count, pitch, pitch_error);
617     } else {
618       cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, faking, mid_cut, offset,
619                                             projection, projection_scale, zero_count, pitch,
620                                             pitch_error);
621     }
622     x++;
623     if (next_zero < x || next_zero == x + zero_offset) {
624       next_zero = x + zero_offset + 1;
625     }
626     if (projection->pile_count(x + zero_offset) <= zero_count) {
627       next_zero = x + zero_offset;
628     }
629   }
630 
631   best_fake = INT16_MAX;
632   best_cost = INT32_MAX;
633   best_count = INT16_MAX;
634   while (x < right_edge + pitch) {
635     offset = x < right_edge ? right_edge - x : 0;
636     cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
637                                     projection_scale, zero_count, pitch, pitch_error);
638     cutpts[x - array_origin].terminal = true;
639     if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
640         best_count + best_fake) {
641       if (cutpts[x - array_origin].fake_count < best_fake ||
642           (cutpts[x - array_origin].fake_count == best_fake &&
643            cutpts[x - array_origin].cost_function() < best_cost)) {
644         best_fake = cutpts[x - array_origin].fake_count;
645         best_cost = cutpts[x - array_origin].cost_function();
646         best_left_x = x;
647         best_right_x = x;
648         best_count = cutpts[x - array_origin].index();
649       } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
650                  cutpts[x - array_origin].cost_function() == best_cost) {
651         // exactly equal
652         best_right_x = x;
653       }
654     }
655     x++;
656   }
657   ASSERT_HOST(best_fake < INT16_MAX);
658 
659   best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
660   //      for (x=left_edge-pitch;x<right_edge+pitch;x++)
661   //      {
662   //              tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
663   //                      x,cutpts[x-array_origin].cost_function(),
664   //                      cutpts[x-array_origin].sum(),
665   //                      cutpts[x-array_origin].squares(),
666   //                      cutpts[x-array_origin].previous()->position());
667   //      }
668   occupation_count = -1;
669   do {
670     for (x = best_end->position() - pitch + pitch_error;
671          x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
672       ;
673     }
674     if (x < best_end->position() - pitch_error) {
675       occupation_count++;
676     }
677     // copy it
678     segpt = new FPSEGPT(best_end);
679     seg_it.add_before_then_move(segpt);
680     best_end = best_end->previous();
681   } while (best_end != nullptr);
682   seg_it.move_to_last();
683   mean_sum = seg_it.data()->sum();
684   mean_sum = mean_sum * mean_sum / best_count;
685   if (seg_it.data()->squares() - mean_sum < 0) {
686     tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
687             seg_it.data()->sum(), best_count);
688   }
689   return seg_it.data()->squares() - mean_sum;
690 }
691 
692 } // namespace tesseract
693