1 /******************************************************************************
2  *
3  * File:         findseam.cpp  (Formerly findseam.c)
4  * Author:       Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 /*----------------------------------------------------------------------
19               I n c l u d e s
20 ----------------------------------------------------------------------*/
21 #include "findseam.h"
22 #include "outlines.h"
23 #include "plotedges.h"
24 #include "seam.h"
25 #include "wordrec.h"
26 
27 // Include automatically generated configuration file if running autoconf.
28 #ifdef HAVE_CONFIG_H
29 #  include "config_auto.h"
30 #endif
31 
32 /**********************************************************************
33  * partial_split_priority
34  *
35  * Assign a priority to this split based on the features that it has.
36  * Grade it according to the different rating schemes and return the
37  * value of its goodness.
38  **********************************************************************/
39 
40 #define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split))
41 
42 /*----------------------------------------------------------------------
43               T y p e s
44 ----------------------------------------------------------------------*/
45 #define SPLIT_CLOSENESS 20 /* Difference in x value */
46                            /* How many to keep */
47 #define MAX_NUM_SEAMS 150
48 /* How many to keep */
49 #define NO_FULL_PRIORITY (-1) // Special marker for pri.
50                             /* Evaluate right away */
51 #define BAD_PRIORITY 9999.0
52 
53 /*----------------------------------------------------------------------
54               F u n c t i o n s
55 ----------------------------------------------------------------------*/
56 namespace tesseract {
57 
58 /**********************************************************************
59  * add_seam_to_queue
60  *
61  * Adds the given new_seam to the seams priority queue, unless it is full
62  * and the new seam is worse than the worst.
63  **********************************************************************/
add_seam_to_queue(float new_priority,SEAM * new_seam,SeamQueue * seams)64 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) {
65   if (new_seam == nullptr) {
66     return;
67   }
68   if (chop_debug) {
69     tprintf("Pushing new seam with priority %g :", new_priority);
70     new_seam->Print("seam: ");
71   }
72   if (seams->size() >= MAX_NUM_SEAMS) {
73     SeamPair old_pair(0, nullptr);
74     if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75       if (chop_debug) {
76         tprintf("Old seam staying with priority %g\n", old_pair.key());
77       }
78       delete new_seam;
79       seams->Push(&old_pair);
80       return;
81     } else if (chop_debug) {
82       tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority,
83               old_pair.key());
84     }
85   }
86   SeamPair new_pair(new_priority, new_seam);
87   seams->Push(&new_pair);
88 }
89 
90 /**********************************************************************
91  * choose_best_seam
92  *
93  * Choose the best seam that can be created by assembling this a
94  * collection of splits.  A queue of all the possible seams is
95  * maintained.  Each new split received is placed in that queue with
96  * its partial priority value.  These values in the seam queue are
97  * evaluated and combined until a good enough seam is found.  If no
98  * further good seams are being found then this function returns to the
99  * caller, who will send more splits.  If this function is called with
100  * a split of nullptr, then no further splits can be supplied by the
101  * caller.
102  **********************************************************************/
choose_best_seam(SeamQueue * seam_queue,const SPLIT * split,PRIORITY priority,SEAM ** seam_result,TBLOB * blob,SeamPile * seam_pile)103 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority,
104                                SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) {
105   SEAM *seam;
106   char str[80];
107   float my_priority;
108   /* Add seam of split */
109   my_priority = priority;
110   if (split != nullptr) {
111     TPOINT split_point = split->point1->pos;
112     split_point += split->point2->pos;
113     split_point /= 2;
114     seam = new SEAM(my_priority, split_point, *split);
115     if (chop_debug > 1) {
116       seam->Print("Partial priority    ");
117     }
118     add_seam_to_queue(my_priority, seam, seam_queue);
119 
120     if (my_priority > chop_good_split) {
121       return;
122     }
123   }
124 
125   TBOX bbox = blob->bounding_box();
126   /* Queue loop */
127   while (!seam_queue->empty()) {
128     SeamPair seam_pair;
129     seam_queue->Pop(&seam_pair);
130     seam = seam_pair.extract_data();
131     /* Set full priority */
132     my_priority =
133         seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
134                            chop_center_knob, chop_width_change_knob);
135     if (chop_debug) {
136       sprintf(str, "Full my_priority %0.0f,  ", my_priority);
137       seam->Print(str);
138     }
139 
140     if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141         my_priority < chop_ok_split) {
142       /* No crossing */
143       if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) {
144         delete *seam_result;
145         *seam_result = new SEAM(*seam);
146         (*seam_result)->set_priority(my_priority);
147       } else {
148         delete seam;
149         seam = nullptr;
150         my_priority = BAD_PRIORITY;
151       }
152     }
153 
154     if (my_priority < chop_good_split) {
155       delete seam;
156       return; /* Made good answer */
157     }
158 
159     if (seam) {
160       /* Combine with others */
161       if (seam_pile->size() < chop_seam_pile_size) {
162         combine_seam(*seam_pile, seam, seam_queue);
163         SeamDecPair pair(seam_pair.key(), seam);
164         seam_pile->Push(&pair);
165       } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size &&
166                  seam_pile->PeekTop().key() > seam_pair.key()) {
167         combine_seam(*seam_pile, seam, seam_queue);
168         SeamDecPair pair;
169         seam_pile->Pop(&pair); // pop the worst.
170         // Replace the seam in pair (deleting the old one) with
171         // the new seam and score, then push back into the heap.
172         pair.set_key(seam_pair.key());
173         pair.set_data(seam);
174         seam_pile->Push(&pair);
175       } else {
176         delete seam;
177       }
178     }
179 
180     my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key();
181     if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) {
182       return;
183     }
184   }
185 }
186 
187 /**********************************************************************
188  * combine_seam
189  *
190  * Find other seams to combine with this one.  The new seams that result
191  * from this union should be added to the seam queue.  The return value
192  * tells whether or not any additional seams were added to the queue.
193  **********************************************************************/
combine_seam(const SeamPile & seam_pile,const SEAM * seam,SeamQueue * seam_queue)194 void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) {
195   for (int x = 0; x < seam_pile.size(); ++x) {
196     const SEAM *this_one = seam_pile.get(x).data();
197     if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
198       SEAM *new_one = new SEAM(*seam);
199       new_one->CombineWith(*this_one);
200       if (chop_debug > 1) {
201         new_one->Print("Combo priority       ");
202       }
203       add_seam_to_queue(new_one->priority(), new_one, seam_queue);
204     }
205   }
206 }
207 
208 /**********************************************************************
209  * pick_good_seam
210  *
211  * Find and return a good seam that will split this blob into two pieces.
212  * Work from the outlines provided.
213  **********************************************************************/
pick_good_seam(TBLOB * blob)214 SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
215   SeamPile seam_pile(chop_seam_pile_size);
216   EDGEPT *points[MAX_NUM_POINTS];
217   EDGEPT_CLIST new_points;
218   SEAM *seam = nullptr;
219   TESSLINE *outline;
220   int16_t num_points = 0;
221 
222 #ifndef GRAPHICS_DISABLED
223   if (chop_debug > 2) {
224     wordrec_display_splits.set_value(true);
225   }
226 
227   draw_blob_edges(blob);
228 #endif
229 
230   PointHeap point_heap(MAX_NUM_POINTS);
231   for (outline = blob->outlines; outline; outline = outline->next) {
232     prioritize_points(outline, &point_heap);
233   }
234 
235   while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236     points[num_points++] = point_heap.PeekTop().data();
237     point_heap.Pop(nullptr);
238   }
239 
240   /* Initialize queue */
241   SeamQueue seam_queue(MAX_NUM_SEAMS);
242 
243   try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244   try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob);
245 
246   if (seam == nullptr) {
247     choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
248   } else if (seam->priority() > chop_good_split) {
249     choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile);
250   }
251 
252   EDGEPT_C_IT it(&new_points);
253   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
254     EDGEPT *inserted_point = it.data();
255     if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
256       for (outline = blob->outlines; outline; outline = outline->next) {
257         if (outline->loop == inserted_point) {
258           outline->loop = outline->loop->next;
259         }
260       }
261       remove_edgept(inserted_point);
262     }
263   }
264 
265   if (seam) {
266     if (seam->priority() > chop_ok_split) {
267       delete seam;
268       seam = nullptr;
269     }
270 #ifndef GRAPHICS_DISABLED
271     else if (wordrec_display_splits) {
272       seam->Mark(edge_window);
273       if (chop_debug > 2) {
274         edge_window->Update();
275         edge_window->Wait();
276       }
277     }
278 #endif
279   }
280 
281   if (chop_debug) {
282     wordrec_display_splits.set_value(false);
283   }
284 
285   return (seam);
286 }
287 
288 /**********************************************************************
289  * try_point_pairs
290  *
291  * Try all the splits that are produced by pairing critical points
292  * together.  See if any of them are suitable for use.  Use a seam
293  * queue and seam pile that have already been initialized and used.
294  **********************************************************************/
try_point_pairs(EDGEPT * points[MAX_NUM_POINTS],int16_t num_points,SeamQueue * seam_queue,SeamPile * seam_pile,SEAM ** seam,TBLOB * blob)295 void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
296                               SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam,
297                               TBLOB *blob) {
298   int16_t x;
299   int16_t y;
300   PRIORITY priority;
301 
302   for (x = 0; x < num_points; x++) {
303     for (y = x + 1; y < num_points; y++) {
304       if (points[y] &&
305           points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length &&
306           points[x] != points[y]->next && points[y] != points[x]->next &&
307           !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) {
308         SPLIT split(points[x], points[y]);
309         priority = partial_split_priority(&split);
310 
311         choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
312       }
313     }
314   }
315 }
316 
317 /**********************************************************************
318  * try_vertical_splits
319  *
320  * Try all the splits that are produced by vertical projection to see
321  * if any of them are suitable for use.  Use a seam queue and seam pile
322  * that have already been initialized and used.
323  * Return in new_points a collection of points that were inserted into
324  * the blob while examining vertical splits and which may safely be
325  * removed once a seam is chosen if they are not part of the seam.
326  **********************************************************************/
try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],int16_t num_points,EDGEPT_CLIST * new_points,SeamQueue * seam_queue,SeamPile * seam_pile,SEAM ** seam,TBLOB * blob)327 void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points,
328                                   EDGEPT_CLIST *new_points, SeamQueue *seam_queue,
329                                   SeamPile *seam_pile, SEAM **seam, TBLOB *blob) {
330   EDGEPT *vertical_point = nullptr;
331   int16_t x;
332   PRIORITY priority;
333   TESSLINE *outline;
334 
335   for (x = 0; x < num_points; x++) {
336     vertical_point = nullptr;
337     for (outline = blob->outlines; outline; outline = outline->next) {
338       vertical_projection_point(points[x], outline->loop, &vertical_point, new_points);
339     }
340 
341     if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next &&
342         points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) {
343       SPLIT split(points[x], vertical_point);
344       priority = partial_split_priority(&split);
345       choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
346     }
347   }
348 }
349 
350 } // namespace tesseract
351