1 #if __FreeBSD__ >= 10
2 #include "/usr/local/include/leptonica/allheaders.h"
3 #include "/usr/local/include/tesseract/baseapi.h"
4 #else
5 #include <leptonica/allheaders.h>
6 #include <tesseract/baseapi.h>
7 #endif
8
9 #include <stdio.h>
10 #include <unistd.h>
11 #include "tessbridge.h"
12
Create()13 TessBaseAPI Create() {
14 tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
15 return (void*)api;
16 }
17
Free(TessBaseAPI a)18 void Free(TessBaseAPI a) {
19 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
20 if (api != nullptr) {
21 api->End();
22 delete api;
23 }
24 }
25
Clear(TessBaseAPI a)26 void Clear(TessBaseAPI a) {
27 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
28 if (api != nullptr) {
29 api->Clear();
30 }
31 }
32
ClearPersistentCache(TessBaseAPI a)33 void ClearPersistentCache(TessBaseAPI a) {
34 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
35 api->ClearPersistentCache();
36 }
37
Init(TessBaseAPI a,char * tessdataprefix,char * languages)38 int Init(TessBaseAPI a, char* tessdataprefix, char* languages) {
39 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
40 return api->Init(tessdataprefix, languages);
41 }
42
Init(TessBaseAPI a,char * tessdataprefix,char * languages,char * configfilepath,char * errbuf)43 int Init(TessBaseAPI a, char* tessdataprefix, char* languages, char* configfilepath, char* errbuf) {
44 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
45
46 // {{{ Redirect STDERR to given buffer
47 fflush(stderr);
48 int original_stderr;
49 original_stderr = dup(STDERR_FILENO);
50 (void)freopen("/dev/null", "a", stderr);
51 setbuf(stderr, errbuf);
52 // }}}
53
54 int ret;
55 if (configfilepath != NULL) {
56 char* configs[] = {configfilepath};
57 int configs_size = 1;
58 ret = api->Init(tessdataprefix, languages, tesseract::OEM_DEFAULT, configs, configs_size, NULL, NULL, false);
59 } else {
60 ret = api->Init(tessdataprefix, languages);
61 }
62
63 // {{{ Restore default stderr
64 (void)freopen("/dev/null", "a", stderr);
65 dup2(original_stderr, STDERR_FILENO);
66 close(original_stderr);
67 setbuf(stderr, NULL);
68 // }}}
69
70 return ret;
71 }
72
SetVariable(TessBaseAPI a,char * name,char * value)73 bool SetVariable(TessBaseAPI a, char* name, char* value) {
74 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
75 return api->SetVariable(name, value);
76 }
77
SetPixImage(TessBaseAPI a,PixImage pix)78 void SetPixImage(TessBaseAPI a, PixImage pix) {
79 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
80 Pix* image = (Pix*)pix;
81 api->SetImage(image);
82 if (api->GetSourceYResolution() < 70) {
83 api->SetSourceResolution(70);
84 }
85 }
86
SetPageSegMode(TessBaseAPI a,int m)87 void SetPageSegMode(TessBaseAPI a, int m) {
88 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
89 tesseract::PageSegMode mode = (tesseract::PageSegMode)m;
90 api->SetPageSegMode(mode);
91 }
92
GetPageSegMode(TessBaseAPI a)93 int GetPageSegMode(TessBaseAPI a) {
94 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
95 return api->GetPageSegMode();
96 }
97
UTF8Text(TessBaseAPI a)98 char* UTF8Text(TessBaseAPI a) {
99 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
100 return api->GetUTF8Text();
101 }
102
HOCRText(TessBaseAPI a)103 char* HOCRText(TessBaseAPI a) {
104 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
105 return api->GetHOCRText(0);
106 }
107
GetBoundingBoxesVerbose(TessBaseAPI a)108 bounding_boxes* GetBoundingBoxesVerbose(TessBaseAPI a) {
109 using namespace tesseract;
110 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
111 struct bounding_boxes* box_array;
112 box_array = (bounding_boxes*)malloc(sizeof(bounding_boxes));
113 // linearly resize boxes array
114 int realloc_threshold = 900;
115 int realloc_raise = 1000;
116 int capacity = 1000;
117 box_array->boxes = (bounding_box*)malloc(capacity * sizeof(bounding_box));
118 box_array->length = 0;
119 api->Recognize(NULL);
120 int block_num = 0;
121 int par_num = 0;
122 int line_num = 0;
123 int word_num = 0;
124
125 ResultIterator* res_it = api->GetIterator();
126 while (!res_it->Empty(RIL_BLOCK)) {
127 if (res_it->Empty(RIL_WORD)) {
128 res_it->Next(RIL_WORD);
129 continue;
130 }
131 // Add rows for any new block/paragraph/textline.
132 if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
133 block_num++;
134 par_num = 0;
135 line_num = 0;
136 word_num = 0;
137 }
138 if (res_it->IsAtBeginningOf(RIL_PARA)) {
139 par_num++;
140 line_num = 0;
141 word_num = 0;
142 }
143 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
144 line_num++;
145 word_num = 0;
146 }
147 word_num++;
148
149 if (box_array->length >= realloc_threshold) {
150 capacity += realloc_raise;
151 box_array->boxes = (bounding_box*)realloc(box_array->boxes, capacity * sizeof(bounding_box));
152 realloc_threshold += realloc_raise;
153 }
154
155 box_array->boxes[box_array->length].word = res_it->GetUTF8Text(RIL_WORD);
156 box_array->boxes[box_array->length].confidence = res_it->Confidence(RIL_WORD);
157 res_it->BoundingBox(RIL_WORD, &box_array->boxes[box_array->length].x1, &box_array->boxes[box_array->length].y1,
158 &box_array->boxes[box_array->length].x2, &box_array->boxes[box_array->length].y2);
159
160 // block, para, line, word numbers
161 box_array->boxes[box_array->length].block_num = block_num;
162 box_array->boxes[box_array->length].par_num = par_num;
163 box_array->boxes[box_array->length].line_num = line_num;
164 box_array->boxes[box_array->length].word_num = word_num;
165
166 box_array->length++;
167 res_it->Next(RIL_WORD);
168 }
169
170 return box_array;
171 }
172
GetBoundingBoxes(TessBaseAPI a,int pageIteratorLevel)173 bounding_boxes* GetBoundingBoxes(TessBaseAPI a, int pageIteratorLevel) {
174 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
175 struct bounding_boxes* box_array;
176 box_array = (bounding_boxes*)malloc(sizeof(bounding_boxes));
177 // linearly resize boxes array
178 int realloc_threshold = 900;
179 int realloc_raise = 1000;
180 int capacity = 1000;
181 box_array->boxes = (bounding_box*)malloc(capacity * sizeof(bounding_box));
182 box_array->length = 0;
183 api->Recognize(NULL);
184 tesseract::ResultIterator* ri = api->GetIterator();
185 tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel)pageIteratorLevel;
186
187 if (ri != 0) {
188 do {
189 if (box_array->length >= realloc_threshold) {
190 capacity += realloc_raise;
191 box_array->boxes = (bounding_box*)realloc(box_array->boxes, capacity * sizeof(bounding_box));
192 realloc_threshold += realloc_raise;
193 }
194 box_array->boxes[box_array->length].word = ri->GetUTF8Text(level);
195 box_array->boxes[box_array->length].confidence = ri->Confidence(level);
196 ri->BoundingBox(level, &box_array->boxes[box_array->length].x1, &box_array->boxes[box_array->length].y1,
197 &box_array->boxes[box_array->length].x2, &box_array->boxes[box_array->length].y2);
198 box_array->length++;
199 } while (ri->Next(level));
200 }
201
202 return box_array;
203 }
204
Version(TessBaseAPI a)205 const char* Version(TessBaseAPI a) {
206 tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a;
207 const char* v = api->Version();
208 return v;
209 }
210
CreatePixImageByFilePath(char * imagepath)211 PixImage CreatePixImageByFilePath(char* imagepath) {
212 Pix* image = pixRead(imagepath);
213 return (void*)image;
214 }
215
CreatePixImageFromBytes(unsigned char * data,int size)216 PixImage CreatePixImageFromBytes(unsigned char* data, int size) {
217 Pix* image = pixReadMem(data, (size_t)size);
218 return (void*)image;
219 }
220
DestroyPixImage(PixImage pix)221 void DestroyPixImage(PixImage pix) {
222 Pix* img = (Pix*)pix;
223 pixDestroy(&img);
224 }
225
GetDataPath()226 const char* GetDataPath() {
227 static tesseract::TessBaseAPI api;
228 api.Init(nullptr, nullptr);
229 return api.GetDatapath();
230 }
231