1 /*
2  * Tesseract-based OCR filter
3  *
4  * Copyright (c) 2014, Martin Herkt <lachs0r@srsfckn.biz>
5  *
6  * Permission to use, copy, modify, and/or distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
11  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12  * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
13  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16  * PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <ctype.h>
20 #include <math.h>
21 #include <stdio.h>
22 #include <string.h>
23 
24 #include <tesseract/capi.h>
25 
26 #include "VapourSynth.h"
27 #include "VSHelper.h"
28 
29 typedef struct OCRData {
30     VSNodeRef *node;
31     VSVideoInfo vi;
32 
33     VSMap *options;
34     char *datapath;
35     char *language;
36 } OCRData;
37 
OCRInit(VSMap * in,VSMap * out,void ** instanceData,VSNode * node,VSCore * core,const VSAPI * vsapi)38 static void VS_CC OCRInit(VSMap *in, VSMap *out, void **instanceData,
39                              VSNode *node, VSCore *core, const VSAPI *vsapi)
40 {
41     OCRData *d = (OCRData *) * instanceData;
42     vsapi->setVideoInfo(&d->vi, 1, node);
43 }
44 
OCRFree(void * instanceData,VSCore * core,const VSAPI * vsapi)45 static void VS_CC OCRFree(void *instanceData, VSCore *core,
46                              const VSAPI *vsapi)
47 {
48     OCRData *d = (OCRData *)instanceData;
49 
50     vsapi->freeNode(d->node);
51     vsapi->freeMap(d->options);
52     free(d->datapath);
53     free(d->language);
54     free(d);
55 }
56 
OCRGetFrame(int n,int activationReason,void ** instanceData,void ** frameData,VSFrameContext * frameCtx,VSCore * core,const VSAPI * vsapi)57 static const VSFrameRef *VS_CC OCRGetFrame(int n, int activationReason,
58                                            void **instanceData,
59                                            void **frameData,
60                                            VSFrameContext *frameCtx,
61                                            VSCore *core,
62                                            const VSAPI *vsapi)
63 {
64     OCRData *d = (OCRData *) * instanceData;
65 
66     if (activationReason == arInitial) {
67         vsapi->requestFrameFilter(n, d->node, frameCtx);
68     } else if (activationReason == arAllFramesReady) {
69         const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx);
70         VSFrameRef *dst = vsapi->copyFrame(src, core);
71         VSMap *m = vsapi->getFramePropsRW(dst);
72 
73         const uint8_t *srcp = vsapi->getReadPtr(src, 0);
74         int width = vsapi->getFrameWidth(src, 0);
75         int height = vsapi->getFrameHeight(src, 0);
76         int stride = vsapi->getStride(src, 0);
77 
78         TessBaseAPI *api = TessBaseAPICreate();
79         if (TessBaseAPIInit3(api, d->datapath, d->language) == -1) {
80             vsapi->setFilterError("Failed to initialize Tesseract", frameCtx);
81 
82             TessBaseAPIDelete(api);
83             vsapi->freeFrame(src);
84             vsapi->freeFrame(dst);
85 
86             return 0;
87         }
88 
89         if (d->options) {
90             int i, err;
91             int nopts = vsapi->propNumElements(d->options, "options");
92 
93             for (i = 0; i < nopts; i += 2) {
94                 const char *key = vsapi->propGetData(d->options, "options",
95                                                      i, &err);
96                 const char *value = vsapi->propGetData(d->options, "options",
97                                                        i + 1, &err);
98 
99                 if (!TessBaseAPISetVariable(api, key, value)) {
100                     char msg[200];
101 
102                     snprintf(msg, 200,
103                              "Failed to set Tesseract option '%s'", key);
104 
105                     vsapi->setFilterError(msg, frameCtx);
106 
107                     TessBaseAPIEnd(api);
108                     TessBaseAPIDelete(api);
109                     vsapi->freeFrame(src);
110                     vsapi->freeFrame(dst);
111 
112                     return 0;
113                 }
114             }
115         }
116 
117         {
118             unsigned i;
119 
120             char *result = TessBaseAPIRect(api, srcp, 1,
121                                            stride, 0, 0, width, height);
122             int *confs = TessBaseAPIAllWordConfidences(api);
123             int length = strlen(result);
124 
125             for (; length > 0 && isspace(result[length - 1]); length--);
126             vsapi->propSetData(m, "OCRString", result, length, paReplace);
127 
128             for (i = 0; confs[i] != -1; i++) {
129                 vsapi->propSetInt(m, "OCRConfidence", confs[i], paAppend);
130             }
131 
132             free(confs);
133             free(result);
134         }
135 
136         TessBaseAPIEnd(api);
137         TessBaseAPIDelete(api);
138         vsapi->freeFrame(src);
139 
140         return dst;
141     }
142 
143     return 0;
144 }
145 
146 /* Tesseract requires zero-terminated strings for API functions like
147    SetVariable. This is to make extra sure that we have them. */
szterm(const char * data,int size)148 static char *szterm(const char *data, int size) {
149     if (size > 0) {
150         char *tmp = malloc(size + 1);
151 
152         if (!tmp)
153             return NULL;
154 
155         memcpy(tmp, data, size);
156         tmp[size] = '\0';
157 
158         return tmp;
159     }
160 
161     return NULL;
162 }
163 
OCRCreate(const VSMap * in,VSMap * out,void * userData,VSCore * core,const VSAPI * vsapi)164 static void VS_CC OCRCreate(const VSMap *in, VSMap *out, void *userData,
165                                VSCore *core, const VSAPI *vsapi)
166 {
167     OCRData d, *data;
168     const char *msg;
169     int err, nopts;
170 
171     int size;
172     const char *opt;
173 
174     d.node = vsapi->propGetNode(in, "clip", 0, 0);
175     d.vi = *vsapi->getVideoInfo(d.node);
176     d.options = NULL;
177     d.datapath = NULL;
178     d.language = NULL;
179 
180     if (!d.vi.format) {
181         msg = "Only constant format input supported";
182         goto error;
183     }
184 
185     if (d.vi.format->sampleType != stInteger ||
186         d.vi.format->bytesPerSample != 1 ||
187         d.vi.format->colorFamily != cmGray) {
188 
189         msg = "Only grayscale 8-bit int formats supported";
190         goto error;
191     }
192 
193     if ((nopts = vsapi->propNumElements(in, "options")) > 0) {
194         if (nopts % 2) {
195             msg = "Options must be key,value pairs";
196             goto error;
197         } else {
198             int i;
199 
200             d.options = vsapi->createMap();
201 
202             for (i = 0; i < nopts; i++) {
203                 char *tmp;
204 
205                 opt = vsapi->propGetData(in, "options", i, &err);
206                 size = vsapi->propGetDataSize(in, "options", i, &err);
207 
208                 if (err) {
209                     msg = "Failed to read an option";
210                     goto error;
211                 }
212 
213                 if (size == 0) {
214                     msg = "Options and their values must have non-zero length";
215                     goto error;
216                 }
217 
218                 tmp = szterm(opt, size);
219 
220                 if (!tmp) {
221                     msg = "Failed to allocate memory for option";
222                     goto error;
223                 }
224 
225                 vsapi->propSetData(d.options, "options",
226                                    tmp, size + 1, paAppend);
227 
228                 free(tmp);
229             }
230         }
231     }
232 
233     opt = vsapi->propGetData(in, "datapath", 0, &err);
234     size = vsapi->propGetDataSize(in, "datapath", 0, &err);
235 
236     if (!err) {
237         d.datapath = szterm(opt, size);
238     }
239 
240     opt = vsapi->propGetData(in, "language", 0, &err);
241     size = vsapi->propGetDataSize(in, "language", 0, &err);
242 
243     if (!err) {
244         d.language = szterm(opt, size);
245 #ifdef _WIN32
246     } else {
247         VSPlugin *ocr_plugin = vsapi->getPluginById("biz.srsfckn.ocr", core);
248         const char *plugin_path = vsapi->getPluginPath(ocr_plugin);
249         char *last_slash = strrchr(plugin_path, '/');
250         d.datapath = szterm(plugin_path, last_slash - plugin_path + 1);
251 #endif
252     }
253 
254     data = malloc(sizeof(d));
255     *data = d;
256 
257     vsapi->createFilter(in, out, "OCR", OCRInit,
258                         OCRGetFrame, OCRFree, fmParallel, 0, data, core);
259 
260     return;
261 
262 error:
263     vsapi->freeNode(d.node);
264     vsapi->freeMap(d.options);
265     free(d.datapath);
266     free(d.language);
267     vsapi->setError(out, msg);
268 }
269 
270 VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc,
271                                             VSRegisterFunction registerFunc,
272                                             VSPlugin *plugin);
273 
VapourSynthPluginInit(VSConfigPlugin configFunc,VSRegisterFunction registerFunc,VSPlugin * plugin)274 VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc,
275                                             VSRegisterFunction registerFunc,
276                                             VSPlugin *plugin)
277 {
278     configFunc("biz.srsfckn.ocr", "ocr", "Tesseract OCR Filter",
279                VAPOURSYNTH_API_VERSION, 1, plugin);
280 
281     registerFunc("Recognize",
282                  "clip:clip;datapath:data:opt;language:data:opt;options:data[]:opt",
283                  OCRCreate, 0, plugin);
284 }
285