1 /*
2 * Tesseract-based OCR filter
3 *
4 * Copyright (c) 2014, Martin Herkt <lachs0r@srsfckn.biz>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
11 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
13 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <ctype.h>
20 #include <math.h>
21 #include <stdio.h>
22 #include <string.h>
23
24 #include <tesseract/capi.h>
25
26 #include "VapourSynth.h"
27 #include "VSHelper.h"
28
29 typedef struct OCRData {
30 VSNodeRef *node;
31 VSVideoInfo vi;
32
33 VSMap *options;
34 char *datapath;
35 char *language;
36 } OCRData;
37
OCRInit(VSMap * in,VSMap * out,void ** instanceData,VSNode * node,VSCore * core,const VSAPI * vsapi)38 static void VS_CC OCRInit(VSMap *in, VSMap *out, void **instanceData,
39 VSNode *node, VSCore *core, const VSAPI *vsapi)
40 {
41 OCRData *d = (OCRData *) * instanceData;
42 vsapi->setVideoInfo(&d->vi, 1, node);
43 }
44
OCRFree(void * instanceData,VSCore * core,const VSAPI * vsapi)45 static void VS_CC OCRFree(void *instanceData, VSCore *core,
46 const VSAPI *vsapi)
47 {
48 OCRData *d = (OCRData *)instanceData;
49
50 vsapi->freeNode(d->node);
51 vsapi->freeMap(d->options);
52 free(d->datapath);
53 free(d->language);
54 free(d);
55 }
56
OCRGetFrame(int n,int activationReason,void ** instanceData,void ** frameData,VSFrameContext * frameCtx,VSCore * core,const VSAPI * vsapi)57 static const VSFrameRef *VS_CC OCRGetFrame(int n, int activationReason,
58 void **instanceData,
59 void **frameData,
60 VSFrameContext *frameCtx,
61 VSCore *core,
62 const VSAPI *vsapi)
63 {
64 OCRData *d = (OCRData *) * instanceData;
65
66 if (activationReason == arInitial) {
67 vsapi->requestFrameFilter(n, d->node, frameCtx);
68 } else if (activationReason == arAllFramesReady) {
69 const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx);
70 VSFrameRef *dst = vsapi->copyFrame(src, core);
71 VSMap *m = vsapi->getFramePropsRW(dst);
72
73 const uint8_t *srcp = vsapi->getReadPtr(src, 0);
74 int width = vsapi->getFrameWidth(src, 0);
75 int height = vsapi->getFrameHeight(src, 0);
76 int stride = vsapi->getStride(src, 0);
77
78 TessBaseAPI *api = TessBaseAPICreate();
79 if (TessBaseAPIInit3(api, d->datapath, d->language) == -1) {
80 vsapi->setFilterError("Failed to initialize Tesseract", frameCtx);
81
82 TessBaseAPIDelete(api);
83 vsapi->freeFrame(src);
84 vsapi->freeFrame(dst);
85
86 return 0;
87 }
88
89 if (d->options) {
90 int i, err;
91 int nopts = vsapi->propNumElements(d->options, "options");
92
93 for (i = 0; i < nopts; i += 2) {
94 const char *key = vsapi->propGetData(d->options, "options",
95 i, &err);
96 const char *value = vsapi->propGetData(d->options, "options",
97 i + 1, &err);
98
99 if (!TessBaseAPISetVariable(api, key, value)) {
100 char msg[200];
101
102 snprintf(msg, 200,
103 "Failed to set Tesseract option '%s'", key);
104
105 vsapi->setFilterError(msg, frameCtx);
106
107 TessBaseAPIEnd(api);
108 TessBaseAPIDelete(api);
109 vsapi->freeFrame(src);
110 vsapi->freeFrame(dst);
111
112 return 0;
113 }
114 }
115 }
116
117 {
118 unsigned i;
119
120 char *result = TessBaseAPIRect(api, srcp, 1,
121 stride, 0, 0, width, height);
122 int *confs = TessBaseAPIAllWordConfidences(api);
123 int length = strlen(result);
124
125 for (; length > 0 && isspace(result[length - 1]); length--);
126 vsapi->propSetData(m, "OCRString", result, length, paReplace);
127
128 for (i = 0; confs[i] != -1; i++) {
129 vsapi->propSetInt(m, "OCRConfidence", confs[i], paAppend);
130 }
131
132 free(confs);
133 free(result);
134 }
135
136 TessBaseAPIEnd(api);
137 TessBaseAPIDelete(api);
138 vsapi->freeFrame(src);
139
140 return dst;
141 }
142
143 return 0;
144 }
145
146 /* Tesseract requires zero-terminated strings for API functions like
147 SetVariable. This is to make extra sure that we have them. */
szterm(const char * data,int size)148 static char *szterm(const char *data, int size) {
149 if (size > 0) {
150 char *tmp = malloc(size + 1);
151
152 if (!tmp)
153 return NULL;
154
155 memcpy(tmp, data, size);
156 tmp[size] = '\0';
157
158 return tmp;
159 }
160
161 return NULL;
162 }
163
OCRCreate(const VSMap * in,VSMap * out,void * userData,VSCore * core,const VSAPI * vsapi)164 static void VS_CC OCRCreate(const VSMap *in, VSMap *out, void *userData,
165 VSCore *core, const VSAPI *vsapi)
166 {
167 OCRData d, *data;
168 const char *msg;
169 int err, nopts;
170
171 int size;
172 const char *opt;
173
174 d.node = vsapi->propGetNode(in, "clip", 0, 0);
175 d.vi = *vsapi->getVideoInfo(d.node);
176 d.options = NULL;
177 d.datapath = NULL;
178 d.language = NULL;
179
180 if (!d.vi.format) {
181 msg = "Only constant format input supported";
182 goto error;
183 }
184
185 if (d.vi.format->sampleType != stInteger ||
186 d.vi.format->bytesPerSample != 1 ||
187 d.vi.format->colorFamily != cmGray) {
188
189 msg = "Only grayscale 8-bit int formats supported";
190 goto error;
191 }
192
193 if ((nopts = vsapi->propNumElements(in, "options")) > 0) {
194 if (nopts % 2) {
195 msg = "Options must be key,value pairs";
196 goto error;
197 } else {
198 int i;
199
200 d.options = vsapi->createMap();
201
202 for (i = 0; i < nopts; i++) {
203 char *tmp;
204
205 opt = vsapi->propGetData(in, "options", i, &err);
206 size = vsapi->propGetDataSize(in, "options", i, &err);
207
208 if (err) {
209 msg = "Failed to read an option";
210 goto error;
211 }
212
213 if (size == 0) {
214 msg = "Options and their values must have non-zero length";
215 goto error;
216 }
217
218 tmp = szterm(opt, size);
219
220 if (!tmp) {
221 msg = "Failed to allocate memory for option";
222 goto error;
223 }
224
225 vsapi->propSetData(d.options, "options",
226 tmp, size + 1, paAppend);
227
228 free(tmp);
229 }
230 }
231 }
232
233 opt = vsapi->propGetData(in, "datapath", 0, &err);
234 size = vsapi->propGetDataSize(in, "datapath", 0, &err);
235
236 if (!err) {
237 d.datapath = szterm(opt, size);
238 }
239
240 opt = vsapi->propGetData(in, "language", 0, &err);
241 size = vsapi->propGetDataSize(in, "language", 0, &err);
242
243 if (!err) {
244 d.language = szterm(opt, size);
245 #ifdef _WIN32
246 } else {
247 VSPlugin *ocr_plugin = vsapi->getPluginById("biz.srsfckn.ocr", core);
248 const char *plugin_path = vsapi->getPluginPath(ocr_plugin);
249 char *last_slash = strrchr(plugin_path, '/');
250 d.datapath = szterm(plugin_path, last_slash - plugin_path + 1);
251 #endif
252 }
253
254 data = malloc(sizeof(d));
255 *data = d;
256
257 vsapi->createFilter(in, out, "OCR", OCRInit,
258 OCRGetFrame, OCRFree, fmParallel, 0, data, core);
259
260 return;
261
262 error:
263 vsapi->freeNode(d.node);
264 vsapi->freeMap(d.options);
265 free(d.datapath);
266 free(d.language);
267 vsapi->setError(out, msg);
268 }
269
270 VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc,
271 VSRegisterFunction registerFunc,
272 VSPlugin *plugin);
273
VapourSynthPluginInit(VSConfigPlugin configFunc,VSRegisterFunction registerFunc,VSPlugin * plugin)274 VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc,
275 VSRegisterFunction registerFunc,
276 VSPlugin *plugin)
277 {
278 configFunc("biz.srsfckn.ocr", "ocr", "Tesseract OCR Filter",
279 VAPOURSYNTH_API_VERSION, 1, plugin);
280
281 registerFunc("Recognize",
282 "clip:clip;datapath:data:opt;language:data:opt;options:data[]:opt",
283 OCRCreate, 0, plugin);
284 }
285