1 /*****************************************************************
2 * gmerlin - a general purpose multimedia framework and applications
3 *
4 * Copyright (c) 2001 - 2011 Members of the Gmerlin project
5 * gmerlin-general@lists.sourceforge.net
6 * http://gmerlin.sourceforge.net
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * *****************************************************************/
21
22 #include <config.h>
23 #include <string.h>
24 #include <ctype.h>
25
26 #include <gmerlin/ocr.h>
27 #include <gmerlin/subprocess.h>
28 #include <gmerlin/utils.h>
29 #include <gmerlin/bggavl.h>
30
31 #include <gmerlin/translation.h>
32 #include <gmerlin/log.h>
33 #define LOG_DOMAIN "ocr"
34
35 typedef struct
36 {
37 const char * name;
38 int (*supported)(bg_plugin_registry_t * plugin_reg);
39 int (*init)(bg_ocr_t * ocr, gavl_video_format_t*);
40 int (*run)(bg_ocr_t * ocr, const gavl_video_format_t*,gavl_video_frame_t*,char ** ret);
41 } ocr_funcs_t;
42
43 static int supported_tesseract(bg_plugin_registry_t * plugin_reg);
44 static int init_tesseract(bg_ocr_t *, gavl_video_format_t *);
45 static int run_tesseract(bg_ocr_t *, const gavl_video_format_t *, gavl_video_frame_t*,char ** ret);
46
47 static ocr_funcs_t ocr_funcs[] =
48 {
49 {
50 .name = "tesseract",
51 .supported = supported_tesseract,
52 .init = init_tesseract,
53 .run = run_tesseract,
54 },
55 { /* */ }
56 };
57
58 struct bg_ocr_s
59 {
60 gavl_video_converter_t * cnv;
61 gavl_video_options_t * opt;
62
63 bg_plugin_registry_t * plugin_reg;
64
65 int do_convert;
66 char lang[4];
67
68 gavl_video_format_t in_format;
69 gavl_video_format_t out_format;
70 ocr_funcs_t * funcs;
71
72 bg_plugin_handle_t * iw_handle;
73 bg_image_writer_plugin_t * iw_plugin;
74
75 gavl_video_frame_t * out_frame;
76
77 bg_iw_callbacks_t cb;
78 char * image_file;
79
80 char * tmpdir;
81
82 };
83
create_output_file(void * priv,const char * name)84 static int create_output_file(void * priv, const char * name)
85 {
86 bg_ocr_t * ocr = priv;
87 ocr->image_file = bg_strdup(ocr->image_file, name);
88 bg_log(BG_LOG_DEBUG, LOG_DOMAIN, "Writing image file %s", name);
89 return 1;
90 }
91
load_image_writer(bg_ocr_t * ocr,const char * plugin)92 static int load_image_writer(bg_ocr_t * ocr, const char * plugin)
93 {
94 const bg_plugin_info_t * info;
95
96 if(ocr->iw_handle)
97 {
98 bg_plugin_unref(ocr->iw_handle);
99 ocr->iw_handle = NULL;
100 }
101
102 info = bg_plugin_find_by_name(ocr->plugin_reg, plugin);
103 ocr->iw_handle = bg_plugin_load(ocr->plugin_reg, info);
104 if(!ocr->iw_handle)
105 return 0;
106 ocr->iw_plugin = (bg_image_writer_plugin_t*)ocr->iw_handle->plugin;
107
108 if(ocr->iw_plugin->set_callbacks)
109 ocr->iw_plugin->set_callbacks(ocr->iw_handle->priv, &ocr->cb);
110 return 1;
111 }
112
bg_ocr_create(bg_plugin_registry_t * plugin_reg)113 bg_ocr_t * bg_ocr_create(bg_plugin_registry_t * plugin_reg)
114 {
115 int i;
116 bg_ocr_t * ret;
117 ocr_funcs_t * funcs = NULL;
118
119 i = 0;
120 while(ocr_funcs[i].name)
121 {
122 if(ocr_funcs[i].supported(plugin_reg))
123 funcs = &ocr_funcs[i];
124 i++;
125 }
126
127 if(!funcs)
128 {
129 bg_log(BG_LOG_ERROR, LOG_DOMAIN, "No engine found");
130 return NULL;
131 }
132
133 ret = calloc(1, sizeof(*ret));
134
135 ret->cb.data = ret;
136 ret->cb.create_output_file = create_output_file;
137
138 ret->cnv = gavl_video_converter_create();
139 ret->opt = gavl_video_converter_get_options(ret->cnv);
140 gavl_video_options_set_alpha_mode(ret->opt, GAVL_ALPHA_BLEND_COLOR);
141
142 ret->plugin_reg = plugin_reg;
143 ret->funcs = funcs;
144
145 return ret;
146 }
147
148 const bg_parameter_info_t parameters[] =
149 {
150 { \
151 .name = "background_color", \
152 .long_name = TRS("Background color"), \
153 .type = BG_PARAMETER_COLOR_RGB, \
154 .val_default = { .val_color = { 0.0, 0.0, 0.0 } }, \
155 .help_string = TRS("Background color to use, when converting formats with transparency to grayscale"), \
156 },
157 { \
158 .name = "tmpdir", \
159 .long_name = TRS("Temporary directory"), \
160 .type = BG_PARAMETER_DIRECTORY, \
161 .val_default = { .val_str = "/tmp" }, \
162 .help_string = TRS("Temporary directory for image files"), \
163 },
164 { /* End */ }
165
166 };
167
bg_ocr_get_parameters()168 const bg_parameter_info_t * bg_ocr_get_parameters()
169 {
170 return parameters;
171 }
172
bg_ocr_set_parameter(void * data,const char * name,const bg_parameter_value_t * val)173 int bg_ocr_set_parameter(void * data, const char * name,
174 const bg_parameter_value_t * val)
175 {
176 bg_ocr_t * ocr = data;
177
178 if(!name)
179 return 1;
180 else if(!strcmp(name, "background_color"))
181 {
182 gavl_video_options_set_background_color(ocr->opt, val->val_color);
183 return 1;
184 }
185 else if(!strcmp(name, "tmpdir"))
186 {
187 ocr->tmpdir = bg_strdup(ocr->tmpdir, val->val_str);
188 return 1;
189 }
190
191 return 0;
192 }
193
194
bg_ocr_init(bg_ocr_t * ocr,const gavl_video_format_t * format,const char * language)195 int bg_ocr_init(bg_ocr_t * ocr,
196 const gavl_video_format_t * format,
197 const char * language)
198 {
199 if(ocr->out_frame)
200 {
201 gavl_video_frame_destroy(ocr->out_frame);
202 ocr->out_frame = NULL;
203 }
204
205 gavl_video_format_copy(&ocr->in_format, format);
206 gavl_video_format_copy(&ocr->out_format, format);
207
208 /* Get pixelformat for conversion */
209
210 if(language && (language[0] != '\0'))
211 strncpy(ocr->lang, language, 3);
212
213 if(!ocr->funcs->init(ocr, &ocr->out_format))
214 return 0;
215
216 /* Initialize converter */
217 ocr->do_convert = gavl_video_converter_init(ocr->cnv,
218 &ocr->in_format,
219 &ocr->out_format);
220
221 if(ocr->do_convert)
222 ocr->out_frame = gavl_video_frame_create(&ocr->out_format);
223
224 return 1;
225 }
226
227
bg_ocr_run(bg_ocr_t * ocr,const gavl_video_format_t * format,gavl_video_frame_t * frame,char ** ret)228 int bg_ocr_run(bg_ocr_t * ocr,
229 const gavl_video_format_t * format,
230 gavl_video_frame_t * frame,
231 char ** ret)
232 {
233 int result;
234 gavl_video_format_t tmp_format;
235
236 if(ocr->do_convert)
237 {
238 gavl_video_format_copy(&tmp_format, format);
239 tmp_format.pixelformat = ocr->out_format.pixelformat;
240
241 gavl_video_converter_init(ocr->cnv,
242 &ocr->in_format,
243 &tmp_format);
244 gavl_video_convert(ocr->cnv, frame, ocr->out_frame);
245
246 result = ocr->funcs->run(ocr, &tmp_format, ocr->out_frame, ret);
247 }
248 else
249 result = ocr->funcs->run(ocr, format, frame, ret);
250
251 if(!result || (**ret == '\0'))
252 {
253 if(*ret)
254 free(*ret);
255
256 bg_log(BG_LOG_WARNING, LOG_DOMAIN,
257 "OCR failed, keeping %s", ocr->image_file);
258 *ret = ocr->image_file;
259 ocr->image_file = NULL;
260 }
261 else
262 {
263 if(ocr->image_file)
264 {
265 bg_log(BG_LOG_DEBUG, LOG_DOMAIN, "Removing %s", ocr->image_file);
266 remove(ocr->image_file);
267 free(ocr->image_file);
268 ocr->image_file = NULL;
269 }
270 }
271 return result;
272 }
273
bg_ocr_destroy(bg_ocr_t * ocr)274 void bg_ocr_destroy(bg_ocr_t * ocr)
275 {
276 if(ocr->cnv)
277 gavl_video_converter_destroy(ocr->cnv);
278 if(ocr->out_frame)
279 gavl_video_frame_destroy(ocr->out_frame);
280 if(ocr->iw_handle)
281 bg_plugin_unref(ocr->iw_handle);
282
283 if(ocr->image_file)
284 free(ocr->image_file);
285 if(ocr->tmpdir)
286 free(ocr->tmpdir);
287
288 free(ocr);
289 }
290
291 /* Tesseract */
292
supported_tesseract(bg_plugin_registry_t * plugin_reg)293 static int supported_tesseract(bg_plugin_registry_t * plugin_reg)
294 {
295 if(!bg_search_file_exec("tesseract", NULL))
296 return 0;
297 if(!bg_plugin_find_by_name(plugin_reg, "iw_tiff"))
298 return 0;
299 return 1;
300 }
301
init_tesseract(bg_ocr_t * ocr,gavl_video_format_t * format)302 static int init_tesseract(bg_ocr_t * ocr, gavl_video_format_t * format)
303 {
304 format->pixelformat = GAVL_GRAY_8;
305
306 if(!ocr->iw_handle)
307 {
308 if(!load_image_writer(ocr, "iw_tiff"))
309 return 0;
310 }
311
312 return 1;
313 }
314
run_tesseract(bg_ocr_t * ocr,const gavl_video_format_t * format,gavl_video_frame_t * frame,char ** ret)315 static int run_tesseract(bg_ocr_t * ocr, const gavl_video_format_t * format,
316 gavl_video_frame_t * frame, char ** ret)
317 {
318 char * pos;
319 char * commandline = NULL;
320 gavl_video_format_t tmp_format;
321 char * tiff_file = NULL;
322 char * text_file = NULL;
323 char * base = NULL;
324 int result = 0;
325
326 char * template = bg_sprintf("%s/gmerlin_ocr_%%05d.tif", ocr->tmpdir);
327
328 /* Create name for tiff file */
329 tiff_file = bg_create_unique_filename(template);
330
331 free(template);
332
333 if(!tiff_file)
334 return 0;
335
336
337 base = bg_strdup(NULL, tiff_file);
338 pos = strrchr(base, '.');
339 if(!pos)
340 return 0;
341
342 *pos = '\0';
343
344 /* Create name for text file */
345 text_file = bg_sprintf("%s.txt", base);
346
347 /* Save image */
348
349 gavl_video_format_copy(&tmp_format, format);
350
351 if(!ocr->iw_plugin->write_header(ocr->iw_handle->priv, base, &tmp_format, NULL))
352 goto fail;
353 if(!ocr->iw_plugin->write_image(ocr->iw_handle->priv, frame))
354 goto fail;
355
356 commandline = bg_sprintf("tesseract %s %s", ocr->image_file, base);
357
358 if(ocr->lang[0] != '\0')
359 {
360 commandline = bg_strcat(commandline, " -l ");
361 commandline = bg_strcat(commandline, bg_iso639_b_to_t(ocr->lang));
362 }
363
364 if(bg_system(commandline))
365 goto fail;
366
367 *ret = bg_read_file(text_file, NULL);
368
369 if(!(*ret))
370 goto fail;
371
372 pos = (*ret) + (strlen(*ret)-1);
373
374 while(isspace(*pos) && (pos >= *ret))
375 {
376 *pos = '\0';
377 pos--;
378 }
379
380 result = 1;
381
382 fail:
383
384 if(tiff_file)
385 free(tiff_file);
386
387 if(base)
388 free(base);
389 if(text_file)
390 {
391 remove(text_file);
392 free(text_file);
393 }
394 if(commandline)
395 free(commandline);
396 return result;
397 }
398
399
400