1 /**********************************************************************
2 * File: pithsync.cpp (Formerly pitsync2.c)
3 * Description: Code to find the optimum fixed pitch segmentation of some blobs.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 #include "pithsync.h"
20
21 #include "makerow.h"
22 #include "pitsync1.h"
23 #include "topitch.h"
24 #include "tprintf.h"
25
26 #include <cfloat> // for FLT_MAX
27 #include <cmath>
28 #include <vector> // for std::vector
29
30 namespace tesseract {
31
32 /**********************************************************************
33 * FPCUTPT::setup
34 *
35 * Constructor to make a new FPCUTPT.
36 **********************************************************************/
37
setup(FPCUTPT * cutpts,int16_t array_origin,STATS * projection,int16_t zero_count,int16_t pitch,int16_t x,int16_t offset)38 void FPCUTPT::setup( // constructor
39 FPCUTPT *cutpts, // predecessors
40 int16_t array_origin, // start coord
41 STATS *projection, // vertical occupation
42 int16_t zero_count, // official zero
43 int16_t pitch, // proposed pitch
44 int16_t x, // position
45 int16_t offset // dist to gap
46 ) {
47 // half of pitch
48 int16_t half_pitch = pitch / 2 - 1;
49 uint32_t lead_flag; // new flag
50 int32_t ind; // current position
51
52 if (half_pitch > 31) {
53 half_pitch = 31;
54 } else if (half_pitch < 0) {
55 half_pitch = 0;
56 }
57 lead_flag = 1 << half_pitch;
58
59 pred = nullptr;
60 mean_sum = 0;
61 sq_sum = offset * offset;
62 cost = sq_sum;
63 faked = false;
64 terminal = false;
65 fake_count = 0;
66 xpos = x;
67 region_index = 0;
68 mid_cuts = 0;
69 if (x == array_origin) {
70 back_balance = 0;
71 fwd_balance = 0;
72 for (ind = 0; ind <= half_pitch; ind++) {
73 fwd_balance >>= 1;
74 if (projection->pile_count(ind) > zero_count) {
75 fwd_balance |= lead_flag;
76 }
77 }
78 } else {
79 back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
80 back_balance &= lead_flag + (lead_flag - 1);
81 if (projection->pile_count(x) > zero_count) {
82 back_balance |= 1;
83 }
84 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
85 if (projection->pile_count(x + half_pitch) > zero_count) {
86 fwd_balance |= lead_flag;
87 }
88 }
89 }
90
91 /**********************************************************************
92 * FPCUTPT::assign
93 *
94 * Constructor to make a new FPCUTPT.
95 **********************************************************************/
96
assign(FPCUTPT * cutpts,int16_t array_origin,int16_t x,bool faking,bool mid_cut,int16_t offset,STATS * projection,float projection_scale,int16_t zero_count,int16_t pitch,int16_t pitch_error)97 void FPCUTPT::assign( // constructor
98 FPCUTPT *cutpts, // predecessors
99 int16_t array_origin, // start coord
100 int16_t x, // position
101 bool faking, // faking this one
102 bool mid_cut, // cheap cut.
103 int16_t offset, // dist to gap
104 STATS *projection, // vertical occupation
105 float projection_scale, // scaling
106 int16_t zero_count, // official zero
107 int16_t pitch, // proposed pitch
108 int16_t pitch_error // allowed tolerance
109 ) {
110 int index; // test index
111 int balance_index; // for balance factor
112 int16_t balance_count; // ding factor
113 int16_t r_index; // test cut number
114 FPCUTPT *segpt; // segment point
115 int32_t dist; // from prev segment
116 double sq_dist; // squared distance
117 double mean; // mean pitch
118 double total; // total dists
119 double factor; // cost function
120 // half of pitch
121 int16_t half_pitch = pitch / 2 - 1;
122 uint32_t lead_flag; // new flag
123
124 if (half_pitch > 31) {
125 half_pitch = 31;
126 } else if (half_pitch < 0) {
127 half_pitch = 0;
128 }
129 lead_flag = 1 << half_pitch;
130
131 back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
132 back_balance &= lead_flag + (lead_flag - 1);
133 if (projection->pile_count(x) > zero_count) {
134 back_balance |= 1;
135 }
136 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
137 if (projection->pile_count(x + half_pitch) > zero_count) {
138 fwd_balance |= lead_flag;
139 }
140
141 xpos = x;
142 cost = FLT_MAX;
143 pred = nullptr;
144 faked = faking;
145 terminal = false;
146 region_index = 0;
147 fake_count = INT16_MAX;
148 for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; index++) {
149 if (index >= array_origin) {
150 segpt = &cutpts[index - array_origin];
151 dist = x - segpt->xpos;
152 if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
153 balance_count = 0;
154 if (textord_balance_factor > 0) {
155 if (textord_fast_pitch_test) {
156 lead_flag = back_balance ^ segpt->fwd_balance;
157 balance_count = 0;
158 while (lead_flag != 0) {
159 balance_count++;
160 lead_flag &= lead_flag - 1;
161 }
162 } else {
163 for (balance_index = 0; index + balance_index < x - balance_index; balance_index++) {
164 balance_count += (projection->pile_count(index + balance_index) <= zero_count) ^
165 (projection->pile_count(x - balance_index) <= zero_count);
166 }
167 }
168 balance_count =
169 static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
170 }
171 r_index = segpt->region_index + 1;
172 total = segpt->mean_sum + dist;
173 balance_count += offset;
174 sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count;
175 mean = total / r_index;
176 factor = mean - pitch;
177 factor *= factor;
178 factor += sq_dist / (r_index)-mean * mean;
179 if (factor < cost && segpt->fake_count + faked <= fake_count) {
180 cost = factor; // find least cost
181 pred = segpt; // save path
182 mean_sum = total;
183 sq_sum = sq_dist;
184 fake_count = segpt->fake_count + faked;
185 mid_cuts = segpt->mid_cuts + mid_cut;
186 region_index = r_index;
187 }
188 }
189 }
190 }
191 }
192
193 /**********************************************************************
194 * FPCUTPT::assign_cheap
195 *
196 * Constructor to make a new FPCUTPT on the cheap.
197 **********************************************************************/
198
assign_cheap(FPCUTPT * cutpts,int16_t array_origin,int16_t x,bool faking,bool mid_cut,int16_t offset,STATS * projection,float projection_scale,int16_t zero_count,int16_t pitch,int16_t pitch_error)199 void FPCUTPT::assign_cheap( // constructor
200 FPCUTPT *cutpts, // predecessors
201 int16_t array_origin, // start coord
202 int16_t x, // position
203 bool faking, // faking this one
204 bool mid_cut, // cheap cut.
205 int16_t offset, // dist to gap
206 STATS *projection, // vertical occupation
207 float projection_scale, // scaling
208 int16_t zero_count, // official zero
209 int16_t pitch, // proposed pitch
210 int16_t pitch_error // allowed tolerance
211 ) {
212 int index; // test index
213 int16_t balance_count; // ding factor
214 int16_t r_index; // test cut number
215 FPCUTPT *segpt; // segment point
216 int32_t dist; // from prev segment
217 double sq_dist; // squared distance
218 double mean; // mean pitch
219 double total; // total dists
220 double factor; // cost function
221 // half of pitch
222 int16_t half_pitch = pitch / 2 - 1;
223 uint32_t lead_flag; // new flag
224
225 if (half_pitch > 31) {
226 half_pitch = 31;
227 } else if (half_pitch < 0) {
228 half_pitch = 0;
229 }
230 lead_flag = 1 << half_pitch;
231
232 back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
233 back_balance &= lead_flag + (lead_flag - 1);
234 if (projection->pile_count(x) > zero_count) {
235 back_balance |= 1;
236 }
237 fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
238 if (projection->pile_count(x + half_pitch) > zero_count) {
239 fwd_balance |= lead_flag;
240 }
241
242 xpos = x;
243 cost = FLT_MAX;
244 pred = nullptr;
245 faked = faking;
246 terminal = false;
247 region_index = 0;
248 fake_count = INT16_MAX;
249 index = x - pitch;
250 if (index >= array_origin) {
251 segpt = &cutpts[index - array_origin];
252 dist = x - segpt->xpos;
253 if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
254 balance_count = 0;
255 if (textord_balance_factor > 0) {
256 lead_flag = back_balance ^ segpt->fwd_balance;
257 balance_count = 0;
258 while (lead_flag != 0) {
259 balance_count++;
260 lead_flag &= lead_flag - 1;
261 }
262 balance_count =
263 static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
264 }
265 r_index = segpt->region_index + 1;
266 total = segpt->mean_sum + dist;
267 balance_count += offset;
268 sq_dist = dist * dist + segpt->sq_sum + balance_count * balance_count;
269 mean = total / r_index;
270 factor = mean - pitch;
271 factor *= factor;
272 factor += sq_dist / (r_index)-mean * mean;
273 cost = factor; // find least cost
274 pred = segpt; // save path
275 mean_sum = total;
276 sq_sum = sq_dist;
277 fake_count = segpt->fake_count + faked;
278 mid_cuts = segpt->mid_cuts + mid_cut;
279 region_index = r_index;
280 }
281 }
282 }
283
284 /**********************************************************************
285 * check_pitch_sync
286 *
287 * Construct the lattice of possible segmentation points and choose the
288 * optimal path. Return the optimal path only.
289 * The return value is a measure of goodness of the sync.
290 **********************************************************************/
291
check_pitch_sync2(BLOBNBOX_IT * blob_it,int16_t blob_count,int16_t pitch,int16_t pitch_error,STATS * projection,int16_t projection_left,int16_t projection_right,float projection_scale,int16_t & occupation_count,FPSEGPT_LIST * seg_list,int16_t start,int16_t end)292 double check_pitch_sync2( // find segmentation
293 BLOBNBOX_IT *blob_it, // blobs to do
294 int16_t blob_count, // no of blobs
295 int16_t pitch, // pitch estimate
296 int16_t pitch_error, // tolerance
297 STATS *projection, // vertical
298 int16_t projection_left, // edges //scale factor
299 int16_t projection_right, float projection_scale,
300 int16_t &occupation_count, // no of occupied cells
301 FPSEGPT_LIST *seg_list, // output list
302 int16_t start, // start of good range
303 int16_t end // end of good range
304 ) {
305 bool faking; // illegal cut pt
306 bool mid_cut; // cheap cut pt.
307 int16_t x; // current coord
308 int16_t blob_index; // blob number
309 int16_t left_edge; // of word
310 int16_t right_edge; // of word
311 int16_t array_origin; // x coord of array
312 int16_t offset; // dist to legal area
313 int16_t zero_count; // projection zero
314 int16_t best_left_x = 0; // for equals
315 int16_t best_right_x = 0; // right edge
316 TBOX this_box; // bounding box
317 TBOX next_box; // box of next blob
318 FPSEGPT *segpt; // segment point
319 double best_cost; // best path
320 double mean_sum; // computes result
321 FPCUTPT *best_end; // end of best path
322 int16_t best_fake; // best fake level
323 int16_t best_count; // no of cuts
324 BLOBNBOX_IT this_it; // copy iterator
325 FPSEGPT_IT seg_it = seg_list; // output iterator
326
327 // tprintf("Computing sync on word of %d blobs with pitch %d\n",
328 // blob_count, pitch);
329 // if (blob_count==8 && pitch==27)
330 // projection->print(stdout,true);
331 zero_count = 0;
332 if (pitch < 3) {
333 pitch = 3; // nothing ludicrous
334 }
335 if ((pitch - 3) / 2 < pitch_error) {
336 pitch_error = (pitch - 3) / 2;
337 }
338 this_it = *blob_it;
339 this_box = box_next(&this_it); // get box
340 // left_edge=this_box.left(); //left of word right_edge=this_box.right();
341 // for (blob_index=1;blob_index<blob_count;blob_index++)
342 // {
343 // this_box=box_next(&this_it);
344 // if (this_box.right()>right_edge)
345 // right_edge=this_box.right();
346 // }
347 for (left_edge = projection_left;
348 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
349 ;
350 }
351 for (right_edge = projection_right;
352 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
353 ;
354 }
355 ASSERT_HOST(right_edge >= left_edge);
356 if (pitsync_linear_version >= 4) {
357 return check_pitch_sync3(projection_left, projection_right, zero_count, pitch, pitch_error,
358 projection, projection_scale, occupation_count, seg_list, start, end);
359 }
360 array_origin = left_edge - pitch;
361 // array of points
362 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
363 for (x = array_origin; x < left_edge; x++) {
364 // free cuts
365 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
366 }
367 for (offset = 0; offset <= pitch_error; offset++, x++) {
368 // not quite free
369 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
370 offset);
371 }
372
373 this_it = *blob_it;
374 best_cost = FLT_MAX;
375 best_end = nullptr;
376 this_box = box_next(&this_it); // first box
377 next_box = box_next(&this_it); // second box
378 blob_index = 1;
379 while (x < right_edge - pitch_error) {
380 if (x > this_box.right() + pitch_error && blob_index < blob_count) {
381 this_box = next_box;
382 next_box = box_next(&this_it);
383 blob_index++;
384 }
385 faking = false;
386 mid_cut = false;
387 if (x <= this_box.left()) {
388 offset = 0;
389 } else if (x <= this_box.left() + pitch_error) {
390 offset = x - this_box.left();
391 } else if (x >= this_box.right()) {
392 offset = 0;
393 } else if (x >= next_box.left() && blob_index < blob_count) {
394 offset = x - next_box.left();
395 if (this_box.right() - x < offset) {
396 offset = this_box.right() - x;
397 }
398 } else if (x >= this_box.right() - pitch_error) {
399 offset = this_box.right() - x;
400 } else if (x - this_box.left() > pitch * pitsync_joined_edge &&
401 this_box.right() - x > pitch * pitsync_joined_edge) {
402 mid_cut = true;
403 offset = 0;
404 } else {
405 faking = true;
406 offset = projection->pile_count(x);
407 }
408 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
409 projection, projection_scale, zero_count, pitch, pitch_error);
410 x++;
411 }
412
413 best_fake = INT16_MAX;
414 best_cost = INT32_MAX;
415 best_count = INT16_MAX;
416 while (x < right_edge + pitch) {
417 offset = x < right_edge ? right_edge - x : 0;
418 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
419 projection_scale, zero_count, pitch, pitch_error);
420 cutpts[x - array_origin].terminal = true;
421 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
422 best_count + best_fake) {
423 if (cutpts[x - array_origin].fake_count < best_fake ||
424 (cutpts[x - array_origin].fake_count == best_fake &&
425 cutpts[x - array_origin].cost_function() < best_cost)) {
426 best_fake = cutpts[x - array_origin].fake_count;
427 best_cost = cutpts[x - array_origin].cost_function();
428 best_left_x = x;
429 best_right_x = x;
430 best_count = cutpts[x - array_origin].index();
431 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
432 cutpts[x - array_origin].cost_function() == best_cost) {
433 // exactly equal
434 best_right_x = x;
435 }
436 }
437 x++;
438 }
439 ASSERT_HOST(best_fake < INT16_MAX);
440
441 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
442 if (this_box.right() == textord_test_x && this_box.top() == textord_test_y) {
443 for (x = left_edge - pitch; x < right_edge + pitch; x++) {
444 tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", x, cutpts[x - array_origin].cost_function(),
445 cutpts[x - array_origin].sum(), cutpts[x - array_origin].squares(),
446 cutpts[x - array_origin].previous()->position());
447 }
448 }
449 occupation_count = -1;
450 do {
451 for (x = best_end->position() - pitch + pitch_error;
452 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
453 ;
454 }
455 if (x < best_end->position() - pitch_error) {
456 occupation_count++;
457 }
458 // copy it
459 segpt = new FPSEGPT(best_end);
460 seg_it.add_before_then_move(segpt);
461 best_end = best_end->previous();
462 } while (best_end != nullptr);
463 seg_it.move_to_last();
464 mean_sum = seg_it.data()->sum();
465 mean_sum = mean_sum * mean_sum / best_count;
466 if (seg_it.data()->squares() - mean_sum < 0) {
467 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
468 seg_it.data()->sum(), best_count);
469 }
470 // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
471 // blob_count,pitch,seg_it.data()->squares()-mean_sum,
472 // occupation_count);
473 return seg_it.data()->squares() - mean_sum;
474 }
475
476 /**********************************************************************
477 * check_pitch_sync
478 *
479 * Construct the lattice of possible segmentation points and choose the
480 * optimal path. Return the optimal path only.
481 * The return value is a measure of goodness of the sync.
482 **********************************************************************/
483
check_pitch_sync3(int16_t projection_left,int16_t projection_right,int16_t zero_count,int16_t pitch,int16_t pitch_error,STATS * projection,float projection_scale,int16_t & occupation_count,FPSEGPT_LIST * seg_list,int16_t start,int16_t end)484 double check_pitch_sync3( // find segmentation
485 int16_t projection_left, // edges //to be considered 0
486 int16_t projection_right, int16_t zero_count,
487 int16_t pitch, // pitch estimate
488 int16_t pitch_error, // tolerance
489 STATS *projection, // vertical
490 float projection_scale, // scale factor
491 int16_t &occupation_count, // no of occupied cells
492 FPSEGPT_LIST *seg_list, // output list
493 int16_t start, // start of good range
494 int16_t end // end of good range
495 ) {
496 bool faking; // illegal cut pt
497 bool mid_cut; // cheap cut pt.
498 int16_t left_edge; // of word
499 int16_t right_edge; // of word
500 int16_t x; // current coord
501 int16_t array_origin; // x coord of array
502 int16_t offset; // dist to legal area
503 int16_t projection_offset; // from scaled projection
504 int16_t prev_zero; // previous zero dist
505 int16_t next_zero; // next zero dist
506 int16_t zero_offset; // scan window
507 int16_t best_left_x = 0; // for equals
508 int16_t best_right_x = 0; // right edge
509 FPSEGPT *segpt; // segment point
510 int minindex; // next input position
511 int test_index; // index to mins
512 double best_cost; // best path
513 double mean_sum; // computes result
514 FPCUTPT *best_end; // end of best path
515 int16_t best_fake; // best fake level
516 int16_t best_count; // no of cuts
517 FPSEGPT_IT seg_it = seg_list; // output iterator
518
519 end = (end - start) % pitch;
520 if (pitch < 3) {
521 pitch = 3; // nothing ludicrous
522 }
523 if ((pitch - 3) / 2 < pitch_error) {
524 pitch_error = (pitch - 3) / 2;
525 }
526 // min dist of zero
527 zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
528 for (left_edge = projection_left;
529 projection->pile_count(left_edge) == 0 && left_edge < projection_right; left_edge++) {
530 ;
531 }
532 for (right_edge = projection_right;
533 projection->pile_count(right_edge) == 0 && right_edge > left_edge; right_edge--) {
534 ;
535 }
536 array_origin = left_edge - pitch;
537 // array of points
538 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
539 // local min results
540 std::vector<bool> mins(pitch_error * 2 + 1);
541 for (x = array_origin; x < left_edge; x++) {
542 // free cuts
543 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x, 0);
544 }
545 prev_zero = left_edge - 1;
546 for (offset = 0; offset <= pitch_error; offset++, x++) {
547 // not quite free
548 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, zero_count, pitch, x,
549 offset);
550 }
551
552 best_cost = FLT_MAX;
553 best_end = nullptr;
554 for (offset = -pitch_error, minindex = 0; offset < pitch_error; offset++, minindex++) {
555 mins[minindex] = projection->local_min(x + offset);
556 }
557 next_zero = x + zero_offset + 1;
558 for (offset = next_zero - 1; offset >= x; offset--) {
559 if (projection->pile_count(offset) <= zero_count) {
560 next_zero = offset;
561 break;
562 }
563 }
564 while (x < right_edge - pitch_error) {
565 mins[minindex] = projection->local_min(x + pitch_error);
566 minindex++;
567 if (minindex > pitch_error * 2) {
568 minindex = 0;
569 }
570 faking = false;
571 mid_cut = false;
572 offset = 0;
573 if (projection->pile_count(x) <= zero_count) {
574 prev_zero = x;
575 } else {
576 for (offset = 1; offset <= pitch_error; offset++) {
577 if (projection->pile_count(x + offset) <= zero_count ||
578 projection->pile_count(x - offset) <= zero_count) {
579 break;
580 }
581 }
582 }
583 if (offset > pitch_error) {
584 if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
585 for (offset = 0; offset <= pitch_error; offset++) {
586 test_index = minindex + pitch_error + offset;
587 if (test_index > pitch_error * 2) {
588 test_index -= pitch_error * 2 + 1;
589 }
590 if (mins[test_index]) {
591 break;
592 }
593 test_index = minindex + pitch_error - offset;
594 if (test_index > pitch_error * 2) {
595 test_index -= pitch_error * 2 + 1;
596 }
597 if (mins[test_index]) {
598 break;
599 }
600 }
601 }
602 if (offset > pitch_error) {
603 offset = projection->pile_count(x);
604 faking = true;
605 } else {
606 projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
607 if (projection_offset > offset) {
608 offset = projection_offset;
609 }
610 mid_cut = true;
611 }
612 }
613 if ((start == 0 && end == 0) || !textord_fast_pitch_test ||
614 (x - projection_left - start) % pitch <= end) {
615 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, faking, mid_cut, offset,
616 projection, projection_scale, zero_count, pitch, pitch_error);
617 } else {
618 cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, faking, mid_cut, offset,
619 projection, projection_scale, zero_count, pitch,
620 pitch_error);
621 }
622 x++;
623 if (next_zero < x || next_zero == x + zero_offset) {
624 next_zero = x + zero_offset + 1;
625 }
626 if (projection->pile_count(x + zero_offset) <= zero_count) {
627 next_zero = x + zero_offset;
628 }
629 }
630
631 best_fake = INT16_MAX;
632 best_cost = INT32_MAX;
633 best_count = INT16_MAX;
634 while (x < right_edge + pitch) {
635 offset = x < right_edge ? right_edge - x : 0;
636 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, false, false, offset, projection,
637 projection_scale, zero_count, pitch, pitch_error);
638 cutpts[x - array_origin].terminal = true;
639 if (cutpts[x - array_origin].index() + cutpts[x - array_origin].fake_count <=
640 best_count + best_fake) {
641 if (cutpts[x - array_origin].fake_count < best_fake ||
642 (cutpts[x - array_origin].fake_count == best_fake &&
643 cutpts[x - array_origin].cost_function() < best_cost)) {
644 best_fake = cutpts[x - array_origin].fake_count;
645 best_cost = cutpts[x - array_origin].cost_function();
646 best_left_x = x;
647 best_right_x = x;
648 best_count = cutpts[x - array_origin].index();
649 } else if (cutpts[x - array_origin].fake_count == best_fake && x == best_right_x + 1 &&
650 cutpts[x - array_origin].cost_function() == best_cost) {
651 // exactly equal
652 best_right_x = x;
653 }
654 }
655 x++;
656 }
657 ASSERT_HOST(best_fake < INT16_MAX);
658
659 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
660 // for (x=left_edge-pitch;x<right_edge+pitch;x++)
661 // {
662 // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
663 // x,cutpts[x-array_origin].cost_function(),
664 // cutpts[x-array_origin].sum(),
665 // cutpts[x-array_origin].squares(),
666 // cutpts[x-array_origin].previous()->position());
667 // }
668 occupation_count = -1;
669 do {
670 for (x = best_end->position() - pitch + pitch_error;
671 x < best_end->position() - pitch_error && projection->pile_count(x) == 0; x++) {
672 ;
673 }
674 if (x < best_end->position() - pitch_error) {
675 occupation_count++;
676 }
677 // copy it
678 segpt = new FPSEGPT(best_end);
679 seg_it.add_before_then_move(segpt);
680 best_end = best_end->previous();
681 } while (best_end != nullptr);
682 seg_it.move_to_last();
683 mean_sum = seg_it.data()->sum();
684 mean_sum = mean_sum * mean_sum / best_count;
685 if (seg_it.data()->squares() - mean_sum < 0) {
686 tprintf("Impossible sqsum=%g, mean=%g, total=%d\n", seg_it.data()->squares(),
687 seg_it.data()->sum(), best_count);
688 }
689 return seg_it.data()->squares() - mean_sum;
690 }
691
692 } // namespace tesseract
693