1 #if __FreeBSD__ >= 10
2 #include "/usr/local/include/leptonica/allheaders.h"
3 #include "/usr/local/include/tesseract/baseapi.h"
4 #else
5 #include <leptonica/allheaders.h>
6 #include <tesseract/baseapi.h>
7 #endif
8 
9 #include <stdio.h>
10 #include <unistd.h>
11 #include "tessbridge.h"
12 
Create()13 TessBaseAPI Create() {
14     tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
15     return (void*)api;
16 }
17 
Free(TessBaseAPI a)18 void Free(TessBaseAPI a) {
19     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
20     if (api != nullptr) {
21         api->End();
22         delete api;
23     }
24 }
25 
Clear(TessBaseAPI a)26 void Clear(TessBaseAPI a) {
27     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
28     if (api != nullptr) {
29         api->Clear();
30     }
31 }
32 
ClearPersistentCache(TessBaseAPI a)33 void ClearPersistentCache(TessBaseAPI a) {
34     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
35     api->ClearPersistentCache();
36 }
37 
Init(TessBaseAPI a,char * tessdataprefix,char * languages)38 int Init(TessBaseAPI a, char* tessdataprefix, char* languages) {
39     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
40     return api->Init(tessdataprefix, languages);
41 }
42 
Init(TessBaseAPI a,char * tessdataprefix,char * languages,char * configfilepath,char * errbuf)43 int Init(TessBaseAPI a, char* tessdataprefix, char* languages, char* configfilepath, char* errbuf) {
44     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
45 
46     // {{{ Redirect STDERR to given buffer
47     fflush(stderr);
48     int original_stderr;
49     original_stderr = dup(STDERR_FILENO);
50     (void)freopen("/dev/null", "a", stderr);
51     setbuf(stderr, errbuf);
52     // }}}
53 
54     int ret;
55     if (configfilepath != NULL) {
56         char* configs[] = {configfilepath};
57         int configs_size = 1;
58         ret = api->Init(tessdataprefix, languages, tesseract::OEM_DEFAULT, configs, configs_size, NULL, NULL, false);
59     } else {
60         ret = api->Init(tessdataprefix, languages);
61     }
62 
63     // {{{ Restore default stderr
64     (void)freopen("/dev/null", "a", stderr);
65     dup2(original_stderr, STDERR_FILENO);
66     close(original_stderr);
67     setbuf(stderr, NULL);
68     // }}}
69 
70     return ret;
71 }
72 
SetVariable(TessBaseAPI a,char * name,char * value)73 bool SetVariable(TessBaseAPI a, char* name, char* value) {
74     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
75     return api->SetVariable(name, value);
76 }
77 
SetPixImage(TessBaseAPI a,PixImage pix)78 void SetPixImage(TessBaseAPI a, PixImage pix) {
79     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
80     Pix* image = (Pix*)pix;
81     api->SetImage(image);
82     if (api->GetSourceYResolution() < 70) {
83         api->SetSourceResolution(70);
84     }
85 }
86 
SetPageSegMode(TessBaseAPI a,int m)87 void SetPageSegMode(TessBaseAPI a, int m) {
88     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
89     tesseract::PageSegMode mode = (tesseract::PageSegMode)m;
90     api->SetPageSegMode(mode);
91 }
92 
GetPageSegMode(TessBaseAPI a)93 int GetPageSegMode(TessBaseAPI a) {
94     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
95     return api->GetPageSegMode();
96 }
97 
UTF8Text(TessBaseAPI a)98 char* UTF8Text(TessBaseAPI a) {
99     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
100     return api->GetUTF8Text();
101 }
102 
HOCRText(TessBaseAPI a)103 char* HOCRText(TessBaseAPI a) {
104     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
105     return api->GetHOCRText(0);
106 }
107 
GetBoundingBoxesVerbose(TessBaseAPI a)108 bounding_boxes* GetBoundingBoxesVerbose(TessBaseAPI a) {
109     using namespace tesseract;
110     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
111     struct bounding_boxes* box_array;
112     box_array = (bounding_boxes*)malloc(sizeof(bounding_boxes));
113     // linearly resize boxes array
114     int realloc_threshold = 900;
115     int realloc_raise = 1000;
116     int capacity = 1000;
117     box_array->boxes = (bounding_box*)malloc(capacity * sizeof(bounding_box));
118     box_array->length = 0;
119     api->Recognize(NULL);
120     int block_num = 0;
121     int par_num = 0;
122     int line_num = 0;
123     int word_num = 0;
124 
125     ResultIterator* res_it = api->GetIterator();
126     while (!res_it->Empty(RIL_BLOCK)) {
127         if (res_it->Empty(RIL_WORD)) {
128             res_it->Next(RIL_WORD);
129             continue;
130         }
131         // Add rows for any new block/paragraph/textline.
132         if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
133             block_num++;
134             par_num = 0;
135             line_num = 0;
136             word_num = 0;
137         }
138         if (res_it->IsAtBeginningOf(RIL_PARA)) {
139             par_num++;
140             line_num = 0;
141             word_num = 0;
142         }
143         if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
144             line_num++;
145             word_num = 0;
146         }
147         word_num++;
148 
149         if (box_array->length >= realloc_threshold) {
150             capacity += realloc_raise;
151             box_array->boxes = (bounding_box*)realloc(box_array->boxes, capacity * sizeof(bounding_box));
152             realloc_threshold += realloc_raise;
153         }
154 
155         box_array->boxes[box_array->length].word = res_it->GetUTF8Text(RIL_WORD);
156         box_array->boxes[box_array->length].confidence = res_it->Confidence(RIL_WORD);
157         res_it->BoundingBox(RIL_WORD, &box_array->boxes[box_array->length].x1, &box_array->boxes[box_array->length].y1,
158                             &box_array->boxes[box_array->length].x2, &box_array->boxes[box_array->length].y2);
159 
160         // block, para, line, word numbers
161         box_array->boxes[box_array->length].block_num = block_num;
162         box_array->boxes[box_array->length].par_num = par_num;
163         box_array->boxes[box_array->length].line_num = line_num;
164         box_array->boxes[box_array->length].word_num = word_num;
165 
166         box_array->length++;
167         res_it->Next(RIL_WORD);
168     }
169 
170     return box_array;
171 }
172 
GetBoundingBoxes(TessBaseAPI a,int pageIteratorLevel)173 bounding_boxes* GetBoundingBoxes(TessBaseAPI a, int pageIteratorLevel) {
174     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
175     struct bounding_boxes* box_array;
176     box_array = (bounding_boxes*)malloc(sizeof(bounding_boxes));
177     // linearly resize boxes array
178     int realloc_threshold = 900;
179     int realloc_raise = 1000;
180     int capacity = 1000;
181     box_array->boxes = (bounding_box*)malloc(capacity * sizeof(bounding_box));
182     box_array->length = 0;
183     api->Recognize(NULL);
184     tesseract::ResultIterator* ri = api->GetIterator();
185     tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel)pageIteratorLevel;
186 
187     if (ri != 0) {
188         do {
189             if (box_array->length >= realloc_threshold) {
190                 capacity += realloc_raise;
191                 box_array->boxes = (bounding_box*)realloc(box_array->boxes, capacity * sizeof(bounding_box));
192                 realloc_threshold += realloc_raise;
193             }
194             box_array->boxes[box_array->length].word = ri->GetUTF8Text(level);
195             box_array->boxes[box_array->length].confidence = ri->Confidence(level);
196             ri->BoundingBox(level, &box_array->boxes[box_array->length].x1, &box_array->boxes[box_array->length].y1,
197                             &box_array->boxes[box_array->length].x2, &box_array->boxes[box_array->length].y2);
198             box_array->length++;
199         } while (ri->Next(level));
200     }
201 
202     return box_array;
203 }
204 
Version(TessBaseAPI a)205 const char* Version(TessBaseAPI a) {
206     tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
207     const char* v = api->Version();
208     return v;
209 }
210 
CreatePixImageByFilePath(char * imagepath)211 PixImage CreatePixImageByFilePath(char* imagepath) {
212     Pix* image = pixRead(imagepath);
213     return (void*)image;
214 }
215 
CreatePixImageFromBytes(unsigned char * data,int size)216 PixImage CreatePixImageFromBytes(unsigned char* data, int size) {
217     Pix* image = pixReadMem(data, (size_t)size);
218     return (void*)image;
219 }
220 
DestroyPixImage(PixImage pix)221 void DestroyPixImage(PixImage pix) {
222     Pix* img = (Pix*)pix;
223     pixDestroy(&img);
224 }
225 
GetDataPath()226 const char* GetDataPath() {
227     static tesseract::TessBaseAPI api;
228     api.Init(nullptr, nullptr);
229     return api.GetDatapath();
230 }
231