1 /**********************************************************************
2  * File:        werdit.cpp  (Formerly wordit.c)
3  * Description: An iterator for passing over all the words in a document.
4  * Author:      Ray Smith
5  * Created:     Mon Apr 27 08:51:22 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "werdit.h"
21 
22 #include "errcode.h"  // for ASSERT_HOST
23 #include "pageres.h"  // for PAGE_RES_IT, PAGE_RES (ptr only), WERD_RES
24 #include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
25 #include "werd.h"     // for WERD
26 
27 namespace tesseract {
28 
29 /**********************************************************************
30  * make_pseudo_word
31  *
32  * Make all the blobs inside a selection into a single word.
33  * The returned PAGE_RES_IT* it points to the new word. After use, call
34  * it->DeleteCurrentWord() to delete the fake word, and then
35  * delete it to get rid of the iterator itself.
36  **********************************************************************/
37 
make_pseudo_word(PAGE_RES * page_res,const TBOX & selection_box)38 PAGE_RES_IT *make_pseudo_word(PAGE_RES *page_res, const TBOX &selection_box) {
39   PAGE_RES_IT pr_it(page_res);
40   C_BLOB_LIST new_blobs;              // list of gathered blobs
41   C_BLOB_IT new_blob_it = &new_blobs; // iterator
42 
43   for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
44     WERD *word = word_res->word;
45     if (word->bounding_box().overlap(selection_box)) {
46       C_BLOB_IT blob_it(word->cblob_list());
47       for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
48         C_BLOB *blob = blob_it.data();
49         if (blob->bounding_box().overlap(selection_box)) {
50           new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
51         }
52       }
53       if (!new_blobs.empty()) {
54         WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
55         word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
56         auto *it = new PAGE_RES_IT(page_res);
57         while (it->word() != word_res && it->word() != nullptr) {
58           it->forward();
59         }
60         ASSERT_HOST(it->word() == word_res);
61         return it;
62       }
63     }
64   }
65   return nullptr;
66 }
67 
68 } // namespace tesseract
69