1 /*****************************************************************
2  * gmerlin - a general purpose multimedia framework and applications
3  *
4  * Copyright (c) 2001 - 2011 Members of the Gmerlin project
5  * gmerlin-general@lists.sourceforge.net
6  * http://gmerlin.sourceforge.net
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  * *****************************************************************/
21 
22 #include <config.h>
23 #include <string.h>
24 #include <ctype.h>
25 
26 #include <gmerlin/ocr.h>
27 #include <gmerlin/subprocess.h>
28 #include <gmerlin/utils.h>
29 #include <gmerlin/bggavl.h>
30 
31 #include <gmerlin/translation.h>
32 #include <gmerlin/log.h>
33 #define LOG_DOMAIN "ocr"
34 
35 typedef struct
36   {
37   const char * name;
38   int (*supported)(bg_plugin_registry_t * plugin_reg);
39   int (*init)(bg_ocr_t * ocr, gavl_video_format_t*);
40   int (*run)(bg_ocr_t * ocr, const gavl_video_format_t*,gavl_video_frame_t*,char ** ret);
41   } ocr_funcs_t;
42 
43 static int supported_tesseract(bg_plugin_registry_t * plugin_reg);
44 static int init_tesseract(bg_ocr_t *, gavl_video_format_t *);
45 static int run_tesseract(bg_ocr_t *, const gavl_video_format_t *, gavl_video_frame_t*,char ** ret);
46 
47 static ocr_funcs_t ocr_funcs[] =
48   {
49     {
50       .name      = "tesseract",
51       .supported = supported_tesseract,
52       .init      = init_tesseract,
53       .run       = run_tesseract,
54     },
55     { /* */ }
56   };
57 
58 struct bg_ocr_s
59   {
60   gavl_video_converter_t * cnv;
61   gavl_video_options_t * opt;
62 
63   bg_plugin_registry_t * plugin_reg;
64 
65   int do_convert;
66   char lang[4];
67 
68   gavl_video_format_t in_format;
69   gavl_video_format_t out_format;
70   ocr_funcs_t * funcs;
71 
72   bg_plugin_handle_t * iw_handle;
73   bg_image_writer_plugin_t * iw_plugin;
74 
75   gavl_video_frame_t * out_frame;
76 
77   bg_iw_callbacks_t cb;
78   char * image_file;
79 
80   char * tmpdir;
81 
82   };
83 
create_output_file(void * priv,const char * name)84 static int create_output_file(void * priv, const char * name)
85   {
86   bg_ocr_t * ocr = priv;
87   ocr->image_file = bg_strdup(ocr->image_file, name);
88   bg_log(BG_LOG_DEBUG, LOG_DOMAIN, "Writing image file %s", name);
89   return 1;
90   }
91 
load_image_writer(bg_ocr_t * ocr,const char * plugin)92 static int load_image_writer(bg_ocr_t * ocr, const char * plugin)
93   {
94   const bg_plugin_info_t * info;
95 
96   if(ocr->iw_handle)
97     {
98     bg_plugin_unref(ocr->iw_handle);
99     ocr->iw_handle = NULL;
100     }
101 
102   info = bg_plugin_find_by_name(ocr->plugin_reg, plugin);
103   ocr->iw_handle = bg_plugin_load(ocr->plugin_reg, info);
104   if(!ocr->iw_handle)
105     return 0;
106   ocr->iw_plugin = (bg_image_writer_plugin_t*)ocr->iw_handle->plugin;
107 
108   if(ocr->iw_plugin->set_callbacks)
109     ocr->iw_plugin->set_callbacks(ocr->iw_handle->priv, &ocr->cb);
110   return 1;
111   }
112 
bg_ocr_create(bg_plugin_registry_t * plugin_reg)113 bg_ocr_t * bg_ocr_create(bg_plugin_registry_t * plugin_reg)
114   {
115   int i;
116   bg_ocr_t * ret;
117   ocr_funcs_t * funcs = NULL;
118 
119   i = 0;
120   while(ocr_funcs[i].name)
121     {
122     if(ocr_funcs[i].supported(plugin_reg))
123       funcs = &ocr_funcs[i];
124     i++;
125     }
126 
127   if(!funcs)
128     {
129     bg_log(BG_LOG_ERROR, LOG_DOMAIN, "No engine found");
130     return NULL;
131     }
132 
133   ret = calloc(1, sizeof(*ret));
134 
135   ret->cb.data = ret;
136   ret->cb.create_output_file = create_output_file;
137 
138   ret->cnv = gavl_video_converter_create();
139   ret->opt = gavl_video_converter_get_options(ret->cnv);
140   gavl_video_options_set_alpha_mode(ret->opt, GAVL_ALPHA_BLEND_COLOR);
141 
142   ret->plugin_reg = plugin_reg;
143   ret->funcs = funcs;
144 
145   return ret;
146   }
147 
148 const bg_parameter_info_t parameters[] =
149   {
150     {                                    \
151       .name =        "background_color",      \
152       .long_name =   TRS("Background color"), \
153       .type =      BG_PARAMETER_COLOR_RGB, \
154       .val_default = { .val_color = { 0.0, 0.0, 0.0 } }, \
155       .help_string = TRS("Background color to use, when converting formats with transparency to grayscale"), \
156     },
157     {                                    \
158       .name =        "tmpdir",      \
159       .long_name =   TRS("Temporary directory"), \
160       .type =      BG_PARAMETER_DIRECTORY, \
161       .val_default = { .val_str = "/tmp" }, \
162       .help_string = TRS("Temporary directory for image files"), \
163     },
164     { /* End */ }
165 
166   };
167 
bg_ocr_get_parameters()168 const bg_parameter_info_t * bg_ocr_get_parameters()
169   {
170   return parameters;
171   }
172 
bg_ocr_set_parameter(void * data,const char * name,const bg_parameter_value_t * val)173 int bg_ocr_set_parameter(void * data, const char * name,
174                           const bg_parameter_value_t * val)
175   {
176   bg_ocr_t * ocr = data;
177 
178   if(!name)
179     return 1;
180   else if(!strcmp(name, "background_color"))
181     {
182     gavl_video_options_set_background_color(ocr->opt, val->val_color);
183     return 1;
184     }
185   else if(!strcmp(name, "tmpdir"))
186     {
187     ocr->tmpdir = bg_strdup(ocr->tmpdir, val->val_str);
188     return 1;
189     }
190 
191   return 0;
192   }
193 
194 
bg_ocr_init(bg_ocr_t * ocr,const gavl_video_format_t * format,const char * language)195 int bg_ocr_init(bg_ocr_t * ocr,
196                 const gavl_video_format_t * format,
197                 const char * language)
198   {
199   if(ocr->out_frame)
200     {
201     gavl_video_frame_destroy(ocr->out_frame);
202     ocr->out_frame = NULL;
203     }
204 
205   gavl_video_format_copy(&ocr->in_format, format);
206   gavl_video_format_copy(&ocr->out_format, format);
207 
208   /* Get pixelformat for conversion */
209 
210   if(language && (language[0] != '\0'))
211     strncpy(ocr->lang, language, 3);
212 
213   if(!ocr->funcs->init(ocr, &ocr->out_format))
214     return 0;
215 
216   /* Initialize converter */
217   ocr->do_convert = gavl_video_converter_init(ocr->cnv,
218                                               &ocr->in_format,
219                                               &ocr->out_format);
220 
221   if(ocr->do_convert)
222     ocr->out_frame = gavl_video_frame_create(&ocr->out_format);
223 
224   return 1;
225   }
226 
227 
bg_ocr_run(bg_ocr_t * ocr,const gavl_video_format_t * format,gavl_video_frame_t * frame,char ** ret)228 int bg_ocr_run(bg_ocr_t * ocr,
229                const gavl_video_format_t * format,
230                gavl_video_frame_t * frame,
231                char ** ret)
232   {
233   int result;
234   gavl_video_format_t tmp_format;
235 
236   if(ocr->do_convert)
237     {
238     gavl_video_format_copy(&tmp_format, format);
239     tmp_format.pixelformat = ocr->out_format.pixelformat;
240 
241     gavl_video_converter_init(ocr->cnv,
242                               &ocr->in_format,
243                               &tmp_format);
244     gavl_video_convert(ocr->cnv, frame, ocr->out_frame);
245 
246     result = ocr->funcs->run(ocr, &tmp_format, ocr->out_frame, ret);
247     }
248   else
249     result = ocr->funcs->run(ocr, format, frame, ret);
250 
251   if(!result || (**ret == '\0'))
252     {
253     if(*ret)
254       free(*ret);
255 
256     bg_log(BG_LOG_WARNING, LOG_DOMAIN,
257            "OCR failed, keeping %s", ocr->image_file);
258     *ret = ocr->image_file;
259     ocr->image_file = NULL;
260     }
261   else
262     {
263     if(ocr->image_file)
264       {
265       bg_log(BG_LOG_DEBUG, LOG_DOMAIN, "Removing %s", ocr->image_file);
266       remove(ocr->image_file);
267       free(ocr->image_file);
268       ocr->image_file = NULL;
269       }
270     }
271   return result;
272   }
273 
bg_ocr_destroy(bg_ocr_t * ocr)274 void bg_ocr_destroy(bg_ocr_t * ocr)
275   {
276   if(ocr->cnv)
277     gavl_video_converter_destroy(ocr->cnv);
278   if(ocr->out_frame)
279     gavl_video_frame_destroy(ocr->out_frame);
280   if(ocr->iw_handle)
281     bg_plugin_unref(ocr->iw_handle);
282 
283   if(ocr->image_file)
284     free(ocr->image_file);
285   if(ocr->tmpdir)
286     free(ocr->tmpdir);
287 
288   free(ocr);
289   }
290 
291 /* Tesseract */
292 
supported_tesseract(bg_plugin_registry_t * plugin_reg)293 static int supported_tesseract(bg_plugin_registry_t * plugin_reg)
294   {
295   if(!bg_search_file_exec("tesseract", NULL))
296     return 0;
297   if(!bg_plugin_find_by_name(plugin_reg, "iw_tiff"))
298     return 0;
299   return 1;
300   }
301 
init_tesseract(bg_ocr_t * ocr,gavl_video_format_t * format)302 static int init_tesseract(bg_ocr_t * ocr, gavl_video_format_t * format)
303   {
304   format->pixelformat = GAVL_GRAY_8;
305 
306   if(!ocr->iw_handle)
307     {
308     if(!load_image_writer(ocr, "iw_tiff"))
309       return 0;
310     }
311 
312   return 1;
313   }
314 
run_tesseract(bg_ocr_t * ocr,const gavl_video_format_t * format,gavl_video_frame_t * frame,char ** ret)315 static int run_tesseract(bg_ocr_t * ocr, const gavl_video_format_t * format,
316                          gavl_video_frame_t * frame, char ** ret)
317   {
318   char * pos;
319   char * commandline = NULL;
320   gavl_video_format_t tmp_format;
321   char * tiff_file = NULL;
322   char * text_file = NULL;
323   char * base = NULL;
324   int result = 0;
325 
326   char * template = bg_sprintf("%s/gmerlin_ocr_%%05d.tif", ocr->tmpdir);
327 
328   /* Create name for tiff file */
329   tiff_file = bg_create_unique_filename(template);
330 
331   free(template);
332 
333   if(!tiff_file)
334     return 0;
335 
336 
337   base = bg_strdup(NULL, tiff_file);
338   pos = strrchr(base, '.');
339   if(!pos)
340     return 0;
341 
342   *pos = '\0';
343 
344   /* Create name for text file */
345   text_file = bg_sprintf("%s.txt", base);
346 
347   /* Save image */
348 
349   gavl_video_format_copy(&tmp_format, format);
350 
351   if(!ocr->iw_plugin->write_header(ocr->iw_handle->priv, base, &tmp_format, NULL))
352     goto fail;
353   if(!ocr->iw_plugin->write_image(ocr->iw_handle->priv, frame))
354     goto fail;
355 
356   commandline = bg_sprintf("tesseract %s %s", ocr->image_file, base);
357 
358   if(ocr->lang[0] != '\0')
359     {
360     commandline = bg_strcat(commandline, " -l ");
361     commandline = bg_strcat(commandline, bg_iso639_b_to_t(ocr->lang));
362     }
363 
364   if(bg_system(commandline))
365     goto fail;
366 
367   *ret = bg_read_file(text_file, NULL);
368 
369   if(!(*ret))
370     goto fail;
371 
372   pos = (*ret) + (strlen(*ret)-1);
373 
374   while(isspace(*pos) && (pos >= *ret))
375     {
376     *pos = '\0';
377     pos--;
378     }
379 
380   result = 1;
381 
382   fail:
383 
384   if(tiff_file)
385     free(tiff_file);
386 
387   if(base)
388     free(base);
389   if(text_file)
390     {
391     remove(text_file);
392     free(text_file);
393     }
394   if(commandline)
395     free(commandline);
396   return result;
397   }
398 
399 
400