1 /* OggEnc
2  **
3  ** This program is distributed under the GNU General Public License, version 2.
4  ** A copy of this license is included with this source.
5  **
6  ** This particular file may also be distributed under (at your option) any
7  ** later version of the GNU General Public License.
8  **
9  ** Copyright 2008, ogg.k.ogg.k <ogg.k.ogg.k@googlemail.com>
10  **
11  ** Portions from ffmpeg2theora, (c) j <j@v2v.cc>
12  **/
13 
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22 
23 #ifdef HAVE_KATE
24 #include <kate/oggkate.h>
25 #endif
26 
27 #include "lyrics.h"
28 #include "utf8.h"
29 #include "i18n.h"
30 
31 typedef enum {
32   lf_unknown,
33   lf_srt,
34   lf_lrc,
35 } lyrics_format;
36 
37 #ifdef HAVE_KATE
38 
fgets2(char * s,size_t sz,FILE * f)39 static char *fgets2(char *s,size_t sz,FILE *f)
40 {
41     char *ret = fgets(s, sz, f);
42     if (ret) {
43       /* fixup DOS newline character */
44       char *ptr=strchr(ret, '\r');
45       if (ptr) {
46         *ptr='\n';
47         *(ptr+1)=0;
48       }
49     }
50     return ret;
51 }
52 
hmsms2s(int h,int m,int s,int ms)53 static double hmsms2s(int h,int m,int s,int ms)
54 {
55     return h*3600+m*60+s+ms/1000.0;
56 }
57 
add_lyrics(oe_lyrics * lyrics,char * text,kate_motion * km,double t0,double t1)58 static int add_lyrics(oe_lyrics *lyrics, char *text, kate_motion *km, double t0,double t1)
59 {
60   size_t len;
61   int ret;
62   char *utf8;
63 
64   ret=utf8_encode(text,&utf8);
65   if (ret<0) {
66     fprintf(stderr,_("Failed to convert to UTF-8: %s\n"),text);
67     return ret;
68   }
69 
70   lyrics->lyrics = (oe_lyrics_item*)realloc(lyrics->lyrics, (lyrics->count+1)*sizeof(oe_lyrics_item));
71   if (!lyrics->lyrics) {
72     free(utf8);
73     fprintf(stderr, _("Out of memory\n"));
74     return -1;
75   }
76   len = strlen(utf8);
77   ret=kate_text_validate(kate_utf8,utf8,len+1);
78   if (ret<0) {
79     fprintf(stderr,_("WARNING: subtitle %s is not valid UTF-8\n"),utf8);
80     free(utf8);
81   }
82   else {
83     /* kill off trailing \n characters */
84     while (len>0) {
85       if (utf8[len-1]=='\n') utf8[--len]=0; else break;
86     }
87     lyrics->lyrics[lyrics->count].text = utf8;
88     lyrics->lyrics[lyrics->count].len = len;
89     lyrics->lyrics[lyrics->count].t0 = t0;
90     lyrics->lyrics[lyrics->count].t1 = t1;
91     lyrics->lyrics[lyrics->count].km = km;
92     lyrics->count++;
93   }
94   return 0;
95 }
96 
is_line_empty(const char * s)97 static int is_line_empty(const char *s)
98 {
99   /* will work fine with UTF-8 despite the appearance */
100   if (s) while (*s) {
101     if (!strchr(" \t\r\n",*s)) return 0;
102     ++s;
103   }
104   return 1;
105 }
106 
load_srt_lyrics(FILE * f)107 static oe_lyrics *load_srt_lyrics(FILE *f)
108 {
109     enum { need_id, need_timing, need_text };
110     int need = need_id;
111     int last_seen_id=0;
112     int ret;
113     int id;
114     static char text[4096];
115     static char str[4096];
116     int h0,m0,s0,ms0,h1,m1,s1,ms1;
117     double t0=0.0;
118     double t1=0.0;
119     oe_lyrics *lyrics;
120     unsigned int line=0;
121 
122     if (!f) return NULL;
123 
124     lyrics=(oe_lyrics*)malloc(sizeof(oe_lyrics));
125     if (!lyrics) return NULL;
126     lyrics->count = 0;
127     lyrics->lyrics = NULL;
128     lyrics->karaoke = 0;
129 
130     fgets2(str,sizeof(str),f);
131     ++line;
132     while (!feof(f)) {
133       switch (need) {
134         case need_id:
135           if (is_line_empty(str)) {
136             /* be nice and ignore extra empty lines between records */
137           }
138           else {
139             ret=sscanf(str,"%d\n",&id);
140             if (ret!=1 || id<0) {
141               fprintf(stderr,_("ERROR - line %u: Syntax error: %s\n"),line,str);
142               free_lyrics(lyrics);
143               return NULL;
144             }
145             if (id!=last_seen_id+1) {
146               fprintf(stderr,_("WARNING - line %u: non consecutive ids: %s - pretending not to have noticed\n"),line,str);
147             }
148             last_seen_id=id;
149             need=need_timing;
150             strcpy(text,"");
151           }
152           break;
153         case need_timing:
154           /* we could use %u, but glibc accepts minus signs for %u for some reason */
155           ret=sscanf(str,"%d:%d:%d%*[.,]%d --> %d:%d:%d%*[.,]%d\n",&h0,&m0,&s0,&ms0,&h1,&m1,&s1,&ms1);
156           if (ret!=8 || (h0|m0|s0|ms0)<0 || (h1|m1|s1|ms1)<0) {
157             fprintf(stderr,_("ERROR - line %u: Syntax error: %s\n"),line,str);
158             free_lyrics(lyrics);
159             return NULL;
160           }
161           else if (t1<t0) {
162             fprintf(stderr,_("ERROR - line %u: end time must not be less than start time: %s\n"),line,str);
163            free_lyrics(lyrics);
164             return NULL;
165           }
166           else {
167             t0=hmsms2s(h0,m0,s0,ms0);
168             t1=hmsms2s(h1,m1,s1,ms1);
169           }
170           need=need_text;
171           break;
172         case need_text:
173           if (str[0]=='\n') {
174             if (add_lyrics(lyrics,text,NULL,t0,t1) < 0) {
175               free_lyrics(lyrics);
176               return NULL;
177             }
178             need=need_id;
179           }
180           else {
181             /* in case of very long lines */
182             size_t len=strlen(text);
183             if (len+strlen(str) >= sizeof(text)) {
184               fprintf(stderr, _("WARNING - line %u: text is too long - truncated\n"),line);
185             }
186             strncpy(text+len,str,sizeof(text)-len);
187             text[sizeof(text)-1]=0;
188           }
189           break;
190       }
191       fgets2(str,sizeof(str),f);
192       ++line;
193     }
194 
195     if (need!=need_id) {
196       /* shouldn't be a problem though, but warn */
197       fprintf(stderr, _("WARNING - line %u: missing data - truncated file?\n"),line);
198     }
199 
200     return lyrics;
201 }
202 
add_kate_karaoke_tag(kate_motion * km,kate_float dt,const char * str,size_t len,int line)203 static void add_kate_karaoke_tag(kate_motion *km,kate_float dt,const char *str,size_t len,int line)
204 {
205   kate_curve *kc;
206   kate_float ptr=(kate_float)-0.5;
207   int ret;
208 
209   if (dt<0) {
210     fprintf(stderr, _("WARNING - line %d: lyrics times must not be decreasing\n"), line);
211     return;
212   }
213 
214   /* work out how many glyphs we have */
215   while (len>0) {
216     ret=kate_text_get_character(kate_utf8,&str,&len);
217     if (ret<0) {
218       fprintf(stderr, _("WARNING - line %d: failed to get UTF-8 glyph from string\n"), line);
219       return;
220     }
221     ptr+=(kate_float)1.0;
222   }
223   /* ptr now points to the middle of the glyph we're at */
224 
225   kc=(kate_curve*)malloc(sizeof(kate_curve));
226   kate_curve_init(kc);
227   kc->type=kate_curve_static;
228   kc->npts=1;
229   kc->pts=(kate_float*)malloc(2*sizeof(kate_float));
230   kc->pts[0]=ptr;
231   kc->pts[1]=(kate_float)0;
232 
233   km->ncurves++;
234   km->curves=(kate_curve**)realloc(km->curves,km->ncurves*sizeof(kate_curve*));
235   km->durations=(kate_float*)realloc(km->durations,km->ncurves*sizeof(kate_float));
236   km->curves[km->ncurves-1]=kc;
237   km->durations[km->ncurves-1]=dt;
238 }
239 
fraction_to_milliseconds(int fraction,int digits)240 static int fraction_to_milliseconds(int fraction,int digits)
241 {
242   while (digits<3) {
243     fraction*=10;
244     ++digits;
245   }
246   while (digits>3) {
247     fraction/=10;
248     --digits;
249   }
250   return fraction;
251 }
252 
process_enhanced_lrc_tags(char * str,kate_float start_time,kate_float end_time,int line)253 static kate_motion *process_enhanced_lrc_tags(char *str,kate_float start_time,kate_float end_time,int line)
254 {
255   char *start,*end;
256   int ret;
257   int m,s,fs;
258   kate_motion *km=NULL;
259   kate_float current_time = start_time;
260   int f0,f1;
261 
262   if (!str) return NULL;
263 
264   start=str;
265   while (1) {
266     start=strchr(start,'<');
267     if (!start) break;
268     end=strchr(start+1,'>');
269     if (!end) break;
270 
271     /* we found a <> pair, parse it */
272     f0=f1=-1;
273     ret=sscanf(start,"<%d:%d.%n%d%n>",&m,&s,&f0,&fs,&f1);
274 
275     /* remove the <> tag from input to get raw text */
276     memmove(start,end+1,strlen(end+1)+1);
277 
278     if (ret<3 || (f0|f1)<0 || f0>=f1 || (m|s|fs)<0) {
279       fprintf(stderr, _("WARNING - line %d: failed to process enhanced LRC tag (%*.*s) - ignored\n"),line,(int)(end-start+1),(int)(end-start+1),start);
280     }
281     else {
282       kate_float tag_time=hmsms2s(0,m,s,fraction_to_milliseconds(fs,f1-f0));
283 
284       /* if this is the first tag in this line, create a kate motion */
285       if (!km) {
286         km=(kate_motion*)malloc(sizeof(kate_motion));
287         if (!km) {
288           fprintf(stderr, _("WARNING: failed to allocate memory - enhanced LRC tag will be ignored\n"));
289         }
290         else {
291           kate_motion_init(km);
292           km->semantics=kate_motion_semantics_glyph_pointer_1;
293         }
294       }
295       /* add to the kate motion */
296       if (km) {
297         add_kate_karaoke_tag(km,tag_time-current_time,str,start-str,line);
298         current_time = tag_time;
299       }
300     }
301   }
302 
303   /* if we've found karaoke info, extend the motion to the end time */
304   if (km) {
305     add_kate_karaoke_tag(km,end_time-current_time,str,strlen(str),line);
306   }
307 
308   return km;
309 }
310 
load_lrc_lyrics(FILE * f)311 static oe_lyrics *load_lrc_lyrics(FILE *f)
312 {
313   oe_lyrics *lyrics;
314   static char str[4096];
315   static char lyrics_line[4096]="";
316   int m,s,fs;
317   double t,start_time = -1.0;
318   int offset;
319   int ret;
320   unsigned line=0;
321   kate_motion *km;
322   int f0,f1;
323 
324   if (!f) return NULL;
325 
326   /* skip headers */
327   fgets2(str,sizeof(str),f);
328   ++line;
329   while (!feof(f)) {
330     ret = sscanf(str, "[%d:%d.%d]%n\n",&m,&s,&fs,&offset);
331     if (ret >= 3)
332       break;
333     fgets2(str,sizeof(str),f);
334     ++line;
335   }
336   if (feof(f)) {
337     fprintf(stderr,_("ERROR - line %u: Syntax error: %s\n"),line,str);
338     return NULL;
339   }
340 
341   lyrics=(oe_lyrics*)malloc(sizeof(oe_lyrics));
342   if (!lyrics) return NULL;
343   lyrics->count = 0;
344   lyrics->lyrics = NULL;
345   lyrics->karaoke = 0;
346 
347   while (!feof(f)) {
348     /* ignore empty lines */
349     if (!is_line_empty(str)) {
350       f0=f1=-1;
351       ret=sscanf(str, "[%d:%d.%n%d%n]%n\n",&m,&s,&f0,&fs,&f1,&offset);
352       if (ret<3 || (f0|f1)<0 || f1<=f0 || (m|s|fs)<0) {
353         fprintf(stderr,_("ERROR - line %u: Syntax error: %s\n"),line,str);
354         free_lyrics(lyrics);
355         return NULL;
356       }
357       t=hmsms2s(0,m,s,fraction_to_milliseconds(fs,f1-f0));
358 
359       if (start_time>=0.0 && !is_line_empty(lyrics_line)) {
360         km=process_enhanced_lrc_tags(lyrics_line,start_time,t,line);
361         if (km) {
362           lyrics->karaoke = 1;
363         }
364         if (add_lyrics(lyrics,lyrics_line,km,start_time,t) < 0) {
365           free_lyrics(lyrics);
366           return NULL;
367         }
368       }
369 
370       strncpy(lyrics_line,str+offset,sizeof(lyrics_line));
371       lyrics_line[sizeof(lyrics_line)-1]=0;
372       start_time=t;
373     }
374 
375     fgets2(str,sizeof(str),f);
376     ++line;
377   }
378 
379   return lyrics;
380 }
381 
382 /* very weak checks, but we only support two formats, so it's ok */
probe_lyrics_format(FILE * f)383 lyrics_format probe_lyrics_format(FILE *f)
384 {
385   int dummy_int;
386   static char str[4096];
387   lyrics_format format=lf_unknown;
388   long pos;
389 
390   if (!f) return lf_unknown;
391 
392   pos=ftell(f);
393   fgets2(str,sizeof(str),f);
394 
395   /* srt */
396   if (sscanf(str, "%d\n", &dummy_int) == 1 && dummy_int>=0)
397     format=lf_srt;
398 
399   /* lrc */
400   if (str[0] == '[')
401     format=lf_lrc;
402 
403   fseek(f,pos,SEEK_SET);
404 
405   return format;
406 }
407 
408 #endif
409 
load_lyrics(const char * filename)410 oe_lyrics *load_lyrics(const char *filename)
411 {
412 #ifdef HAVE_KATE
413   static char str[4096];
414   int ret;
415   oe_lyrics *lyrics=NULL;
416   FILE *f;
417 
418   if (!filename) {
419       fprintf(stderr,_("ERROR: No lyrics filename to load from\n"));
420       return NULL;
421   }
422 
423   f = fopen(filename, "r");
424   if (!f) {
425       fprintf(stderr,_("ERROR: Failed to open lyrics file %s (%s)\n"), filename, strerror(errno));
426       return NULL;
427   }
428 
429   /* first, check for a BOM */
430   ret=fread(str,1,3,f);
431   if (ret<3 || memcmp(str,"\xef\xbb\xbf",3)) {
432     /* No BOM, rewind */
433     fseek(f,0,SEEK_SET);
434   }
435 
436   switch (probe_lyrics_format(f)) {
437     case lf_srt:
438       lyrics = load_srt_lyrics(f);
439       break;
440     case lf_lrc:
441       lyrics = load_lrc_lyrics(f);
442       break;
443     default:
444       fprintf(stderr, _("ERROR: Failed to load %s - can't determine format\n"), filename);
445       break;
446   }
447 
448   fclose(f);
449 
450   return lyrics;
451 #else
452   return NULL;
453 #endif
454 }
455 
free_lyrics(oe_lyrics * lyrics)456 void free_lyrics(oe_lyrics *lyrics)
457 {
458 #ifdef HAVE_KATE
459     size_t n,c;
460     if (lyrics) {
461         for (n=0; n<lyrics->count; ++n) {
462           oe_lyrics_item *li=&lyrics->lyrics[n];
463           free(li->text);
464           if (li->km) {
465             for (c=0; c<li->km->ncurves; ++c) {
466               free(li->km->curves[c]->pts);
467               free(li->km->curves[c]);
468             }
469             free(li->km->curves);
470             free(li->km->durations);
471             free(li->km);
472           }
473         }
474         free(lyrics->lyrics);
475         free(lyrics);
476     }
477 #endif
478 }
479 
get_lyrics(const oe_lyrics * lyrics,double t,size_t * idx)480 const oe_lyrics_item *get_lyrics(const oe_lyrics *lyrics, double t, size_t *idx)
481 {
482 #ifdef HAVE_KATE
483     if (!lyrics || *idx>=lyrics->count) return NULL;
484     if (lyrics->lyrics[*idx].t0 > t) return NULL;
485     return &lyrics->lyrics[(*idx)++];
486 #else
487     return NULL;
488 #endif
489 }
490