1 /*
2 * GPAC - Multimedia Framework C SDK
3 *
4 * Authors: Jean Le Feuvre
5 * Copyright (c) Telecom ParisTech 2000-2020
6 * All rights reserved
7 *
8 * This file is part of GPAC / text import filter
9 *
10 * GPAC is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * GPAC is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; see the file COPYING. If not, write to
22 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 */
25
26
27
28 #include <gpac/filters.h>
29 #include <gpac/constants.h>
30 #include <gpac/utf.h>
31 #include <gpac/xml.h>
32 #include <gpac/token.h>
33 #include <gpac/color.h>
34 #include <gpac/internal/media_dev.h>
35 #include <gpac/internal/isomedia_dev.h>
36
37 #ifndef GPAC_DISABLE_SWF_IMPORT
38 /* SWF Importer */
39 #include <gpac/internal/swf_dev.h>
40 #endif
41
42 #ifndef GPAC_DISABLE_ISOM_WRITE
43
44 typedef struct __txtin_ctx GF_TXTIn;
45
46 struct __txtin_ctx
47 {
48 //opts
49 u32 width, height, txtx, txty, fontsize;
50 s32 zorder;
51 const char *fontname, *lang;
52 Bool nodefbox, noflush, webvtt;
53 u32 timescale;
54 GF_Fraction fps;
55
56
57 GF_FilterPid *ipid, *opid;
58 const char *file_name;
59 u32 fmt;
60 u32 playstate;
61 //0: not seeking, 1: seek request pending, 2: seek configured, discarding packets up until start_range
62 u32 seek_state;
63 Double start_range;
64
65 Bool is_setup;
66
67 GF_Err (*text_process)(GF_Filter *filter, GF_TXTIn *ctx);
68
69 s32 unicode_type;
70
71 FILE *src;
72
73 GF_BitStream *bs_w;
74 Bool first_samp;
75 Bool hdr_parsed;
76
77 //state vars for srt
78 u32 state, default_color;
79 GF_TextSample *samp;
80 u64 start, end, prev_end;
81 u32 curLine;
82 GF_StyleRecord style;
83
84 //WebVTT state
85 GF_WebVTTParser *vttparser;
86
87 //TTXT state
88 GF_DOMParser *parser;
89 u32 cur_child_idx, nb_children, last_desc_idx;
90 GF_List *text_descs;
91 Bool last_sample_empty;
92 u64 last_sample_duration;
93 //TTML state is the same as ttxt plus the timescale and start (webvtt) for cts compute
94 u32 txml_timescale;
95
96 //TTML state
97 GF_XMLNode *root_working_copy, *div_node, *sample_list_node;
98 GF_DOMParser *parser_working_copy;
99 Bool non_compliant_ttml;
100 u32 nb_p_found;
101
102
103
104 #ifndef GPAC_DISABLE_SWF_IMPORT
105 //SWF text
106 SWFReader *swf_parse;
107 Bool do_suspend;
108 #endif
109
110
111 };
112
113
114 enum
115 {
116 GF_TXTIN_MODE_NONE = 0,
117 GF_TXTIN_MODE_SRT,
118 GF_TXTIN_MODE_SUB,
119 GF_TXTIN_MODE_TTXT,
120 GF_TXTIN_MODE_TEXML,
121 GF_TXTIN_MODE_WEBVTT,
122 GF_TXTIN_MODE_TTML,
123 GF_TXTIN_MODE_SWF_SVG,
124 };
125
126 #define REM_TRAIL_MARKS(__str, __sep) while (1) { \
127 u32 _len = (u32) strlen(__str); \
128 if (!_len) break; \
129 _len--; \
130 if (strchr(__sep, __str[_len])) __str[_len] = 0; \
131 else break; \
132 } \
133
134
gf_text_get_utf_type(FILE * in_src)135 s32 gf_text_get_utf_type(FILE *in_src)
136 {
137 u32 read;
138 unsigned char BOM[5];
139 read = (u32) gf_fread(BOM, 5, in_src);
140 if ((s32) read < 1)
141 return -1;
142
143 if ((BOM[0]==0xFF) && (BOM[1]==0xFE)) {
144 /*UTF32 not supported*/
145 if (!BOM[2] && !BOM[3]) return -1;
146 gf_fseek(in_src, 2, SEEK_SET);
147 return 3;
148 }
149 if ((BOM[0]==0xFE) && (BOM[1]==0xFF)) {
150 /*UTF32 not supported*/
151 if (!BOM[2] && !BOM[3]) return -1;
152 gf_fseek(in_src, 2, SEEK_SET);
153 return 2;
154 } else if ((BOM[0]==0xEF) && (BOM[1]==0xBB) && (BOM[2]==0xBF)) {
155 gf_fseek(in_src, 3, SEEK_SET);
156 return 1;
157 }
158 if (BOM[0]<0x80) {
159 gf_fseek(in_src, 0, SEEK_SET);
160 return 0;
161 }
162 return -1;
163 }
ttxt_dom_progress(void * cbk,u64 cur_samp,u64 count)164 static void ttxt_dom_progress(void *cbk, u64 cur_samp, u64 count)
165 {
166 GF_TXTIn *ctx = (GF_TXTIn *)cbk;
167 ctx->end = count;
168 }
169
gf_text_guess_format(const char * filename,u32 * fmt)170 static GF_Err gf_text_guess_format(const char *filename, u32 *fmt)
171 {
172 char szLine[2048];
173 u32 val;
174 s32 uni_type;
175 FILE *test = gf_fopen(filename, "rb");
176 if (!test) return GF_URL_ERROR;
177 uni_type = gf_text_get_utf_type(test);
178
179 if (uni_type>1) {
180 const u16 *sptr;
181 char szUTF[1024];
182 u32 read = (u32) gf_fread(szUTF, 1023, test);
183 if ((s32) read < 0) {
184 gf_fclose(test);
185 return GF_IO_ERR;
186 }
187 szUTF[read]=0;
188 sptr = (u16*)szUTF;
189 /*read = (u32) */gf_utf8_wcstombs(szLine, read, &sptr);
190 } else {
191 val = (u32) gf_fread(szLine, 1024, test);
192 if ((s32) val<0) return GF_IO_ERR;
193
194 szLine[val]=0;
195 }
196 REM_TRAIL_MARKS(szLine, "\r\n\t ")
197
198 *fmt = GF_TXTIN_MODE_NONE;
199 if ((szLine[0]=='{') && strstr(szLine, "}{")) *fmt = GF_TXTIN_MODE_SUB;
200 else if (szLine[0] == '<') {
201 char *ext = gf_file_ext_start(filename);
202 if (!strnicmp(ext, ".ttxt", 5)) *fmt = GF_TXTIN_MODE_TTXT;
203 else if (!strnicmp(ext, ".ttml", 5)) *fmt = GF_TXTIN_MODE_TTML;
204 ext = strstr(szLine, "?>");
205 if (ext) ext += 2;
206 if (ext && !ext[0]) {
207 if (!gf_fgets(szLine, 2048, test))
208 szLine[0] = '\0';
209 }
210 if (strstr(szLine, "x-quicktime-tx3g") || strstr(szLine, "text3GTrack")) *fmt = GF_TXTIN_MODE_TEXML;
211 else if (strstr(szLine, "TextStream")) *fmt = GF_TXTIN_MODE_TTXT;
212 else if (strstr(szLine, "tt")) *fmt = GF_TXTIN_MODE_TTML;
213 }
214 else if (strstr(szLine, "WEBVTT") )
215 *fmt = GF_TXTIN_MODE_WEBVTT;
216 else if (strstr(szLine, " --> ") )
217 *fmt = GF_TXTIN_MODE_SRT; /* might want to change the default to WebVTT */
218
219 else if (!strncmp(szLine, "FWS", 3) || !strncmp(szLine, "CWS", 3))
220 *fmt = GF_TXTIN_MODE_SWF_SVG;
221
222 gf_fclose(test);
223 return GF_OK;
224 }
225
226
227
gf_text_get_utf8_line(char * szLine,u32 lineSize,FILE * txt_in,s32 unicode_type)228 char *gf_text_get_utf8_line(char *szLine, u32 lineSize, FILE *txt_in, s32 unicode_type)
229 {
230 u32 i, j, len;
231 char *sOK;
232 char szLineConv[1024];
233 unsigned short *sptr;
234
235 memset(szLine, 0, sizeof(char)*lineSize);
236 sOK = gf_fgets(szLine, lineSize, txt_in);
237 if (!sOK) return NULL;
238 if (unicode_type<=1) {
239 j=0;
240 len = (u32) strlen(szLine);
241 for (i=0; i<len; i++) {
242 if (!unicode_type && (szLine[i] & 0x80)) {
243 /*non UTF8 (likely some win-CP)*/
244 if ((szLine[i+1] & 0xc0) != 0x80) {
245 szLineConv[j] = 0xc0 | ( (szLine[i] >> 6) & 0x3 );
246 j++;
247 szLine[i] &= 0xbf;
248 }
249 /*UTF8 2 bytes char*/
250 else if ( (szLine[i] & 0xe0) == 0xc0) {
251 szLineConv[j] = szLine[i];
252 i++;
253 j++;
254 }
255 /*UTF8 3 bytes char*/
256 else if ( (szLine[i] & 0xf0) == 0xe0) {
257 szLineConv[j] = szLine[i];
258 i++;
259 j++;
260 szLineConv[j] = szLine[i];
261 i++;
262 j++;
263 }
264 /*UTF8 4 bytes char*/
265 else if ( (szLine[i] & 0xf8) == 0xf0) {
266 szLineConv[j] = szLine[i];
267 i++;
268 j++;
269 szLineConv[j] = szLine[i];
270 i++;
271 j++;
272 szLineConv[j] = szLine[i];
273 i++;
274 j++;
275 } else {
276 i+=1;
277 continue;
278 }
279 }
280 szLineConv[j] = szLine[i];
281 j++;
282 }
283 szLineConv[j] = 0;
284 strcpy(szLine, szLineConv);
285 return sOK;
286 }
287
288 #ifdef GPAC_BIG_ENDIAN
289 if (unicode_type==3) {
290 #else
291 if (unicode_type==2) {
292 #endif
293 i=0;
294 while (1) {
295 char c;
296 if (!szLine[i] && !szLine[i+1]) break;
297 c = szLine[i+1];
298 szLine[i+1] = szLine[i];
299 szLine[i] = c;
300 i+=2;
301 }
302 }
303 sptr = (u16 *)szLine;
304 i = (u32) gf_utf8_wcstombs(szLineConv, 1024, (const unsigned short **) &sptr);
305 szLineConv[i] = 0;
306 strcpy(szLine, szLineConv);
307 /*this is ugly indeed: since input is UTF16-LE, there are many chances the gf_fgets never reads the \0 after a \n*/
308 if (unicode_type==3) gf_fgetc(txt_in);
309 return sOK;
310 }
311
312
313 static void txtin_probe_duration(GF_TXTIn *ctx)
314 {
315 GF_Fraction64 dur;
316 dur.num = 0;
317
318 if (ctx->fmt == GF_TXTIN_MODE_SWF_SVG) {
319 #ifndef GPAC_DISABLE_SWF_IMPORT
320 u32 frame_count, frame_rate;
321 gf_swf_get_duration(ctx->swf_parse, &frame_rate, &frame_count);
322 if (frame_count) {
323 GF_Fraction64 tdur;
324 tdur.num = frame_count;
325 tdur.den = frame_rate;
326 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(tdur));
327 }
328 #endif
329 return;
330 }
331 if ((ctx->fmt == GF_TXTIN_MODE_SRT) || (ctx->fmt == GF_TXTIN_MODE_WEBVTT) || (ctx->fmt == GF_TXTIN_MODE_SUB)) {
332 u64 pos = gf_ftell(ctx->src);
333 gf_fseek(ctx->src, 0, SEEK_SET);
334 while (!gf_feof(ctx->src)) {
335 u64 end;
336 char szLine[2048];
337 char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
338 if (!sOK) break;
339 REM_TRAIL_MARKS(szLine, "\r\n\t ")
340
341 if (ctx->fmt == GF_TXTIN_MODE_SUB) {
342 char szText[2048];
343 u32 sframe, eframe;
344 if (sscanf(szLine, "{%d}{%d}%2047s", &sframe, &eframe, szText) == 3) {
345 if (ctx->fps.den)
346 end = 1000 * eframe * ctx->fps.num / ctx->fps.den;
347 else
348 end = 1000 * eframe / 25;
349 if (end > (u64) dur.num) dur.num = (s64) end;
350 }
351 } else {
352 u32 eh, em, es, ems;
353 char *start = strstr(szLine, "-->");
354 if (!start) continue;
355 while (start[0] && ((start[0] == ' ') || (start[0] == '\t'))) start++;
356
357 if (sscanf(start, "%u:%u:%u,%u", &eh, &em, &es, &ems) != 4) {
358 eh = 0;
359 if (sscanf(szLine, "%u:%u,%u", &em, &es, &ems) != 3) {
360 continue;
361 }
362 }
363 end = (3600*eh + 60*em + es)*1000 + ems;
364 if (end > (u64) dur.num) dur.num = (s64) end;
365 }
366 }
367 gf_fseek(ctx->src, pos, SEEK_SET);
368 if (dur.num) {
369 dur.den = 1000;
370 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
371 }
372 return;
373 }
374 if ((ctx->fmt == GF_TXTIN_MODE_TTXT) || (ctx->fmt == GF_TXTIN_MODE_TEXML)) {
375 u32 i=0;
376 GF_XMLNode *node, *root = gf_xml_dom_get_root(ctx->parser);
377 while ((node = gf_list_enum(root->content, &i))) {
378 u32 j;
379 u64 duration;
380 GF_XMLAttribute *att;
381 if (node->type) {
382 continue;
383 }
384 /*sample text*/
385 if ((ctx->fmt == GF_TXTIN_MODE_TTXT) && strcmp(node->name, "TextSample")) continue;
386 else if ((ctx->fmt == GF_TXTIN_MODE_TEXML) && strcmp(node->name, "sample")) continue;
387
388
389 j=0;
390 while ( (att=(GF_XMLAttribute*)gf_list_enum(node->attributes, &j))) {
391 u32 h, m, s, ms;
392 u64 ts=0;
393 if (ctx->fmt == GF_TXTIN_MODE_TTXT) {
394 if (strcmp(att->name, "sampleTime")) continue;
395
396 if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
397 ts = (h*3600 + m*60 + s)*1000 + ms;
398 } else {
399 ts = (u32) (atof(att->value) * 1000);
400 }
401 if (ts > (u64) dur.num) dur.num = (s64) ts;
402 } else {
403 if (strcmp(att->name, "duration")) continue;
404 duration = atoi(att->value);
405 dur.num += (s32) ( (1000 * duration) / ctx->txml_timescale);
406 }
407 }
408 }
409 if (dur.num) {
410 dur.den = 1000;
411 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
412 }
413 return;
414 }
415
416 if (ctx->fmt == GF_TXTIN_MODE_TTML) {
417 u32 i=0;
418 GF_XMLNode *node, *p_node;
419
420 while ((node = gf_list_enum(ctx->div_node->content, &i))) {
421 GF_XMLAttribute *att;
422 u32 h, m, s, ms, p_idx=0;
423 u64 ts_end=0;
424 h = m = s = ms = 0;
425 while ( (att = (GF_XMLAttribute*)gf_list_enum(node->attributes, &p_idx))) {
426 if (strcmp(att->name, "end")) continue;
427
428 if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
429 ts_end = (h*3600 + m*60+s)*1000+ms;
430 } else if (sscanf(att->value, "%u:%u:%u", &h, &m, &s) == 3) {
431 ts_end = (h*3600 + m*60+s)*1000;
432 }
433 }
434 //or under a <span>
435 p_idx = 0;
436 while ( (p_node = (GF_XMLNode*)gf_list_enum(node->content, &p_idx))) {
437 u32 span_idx = 0;
438 while ( (att = (GF_XMLAttribute*)gf_list_enum(p_node->attributes, &span_idx))) {
439 if (strcmp(att->name, "end")) continue;
440 if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
441 ts_end = (h*3600 + m*60+s)*1000+ms;
442 } else if (sscanf(att->value, "%u:%u:%u", &h, &m, &s) == 3) {
443 ts_end = (h*3600 + m*60+s)*1000;
444 }
445 }
446 }
447 if (ts_end > (u64) dur.num) dur.num = (s64) ts_end;
448 }
449 if (dur.num) {
450 dur.den = 1000;
451 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, &PROP_FRAC64(dur));
452 }
453 return;
454 }
455 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Duration probing not supported for format %d\n", ctx->fmt));
456 }
457
458 static GF_Err txtin_setup_srt(GF_Filter *filter, GF_TXTIn *ctx)
459 {
460 u32 ID, OCR_ES_ID, dsi_len, file_size;
461 u8 *dsi;
462 GF_TextSampleDescriptor *sd;
463
464 ctx->src = gf_fopen(ctx->file_name, "rt");
465 if (!ctx->src) return GF_URL_ERROR;
466
467 file_size = (u32) gf_fsize(ctx->src);
468
469 ctx->unicode_type = gf_text_get_utf_type(ctx->src);
470 if (ctx->unicode_type<0) {
471 gf_fclose(ctx->src);
472 ctx->src = NULL;
473 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Unsupported SRT UTF encoding\n"));
474 return GF_NOT_SUPPORTED;
475 }
476
477 if (!ctx->timescale) ctx->timescale = 1000;
478 OCR_ES_ID = ID = 0;
479
480 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
481 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
482 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_TX3G) );
483 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
484 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
485
486 if (!ID) ID = 1;
487 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
488 if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
489 if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
490 if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
491 if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
492 if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
493
494 sd = (GF_TextSampleDescriptor*)gf_odf_desc_new(GF_ODF_TX3G_TAG);
495 sd->fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
496 sd->font_count = 1;
497 sd->fonts[0].fontID = 1;
498 sd->fonts[0].fontName = gf_strdup(ctx->fontname ? ctx->fontname : "Serif");
499 sd->back_color = 0x00000000; /*transparent*/
500 sd->default_style.fontID = 1;
501 sd->default_style.font_size = ctx->fontsize;
502 sd->default_style.text_color = 0xFFFFFFFF; /*white*/
503 sd->default_style.style_flags = 0;
504 sd->horiz_justif = 1; /*center of scene*/
505 sd->vert_justif = (s8) -1; /*bottom of scene*/
506
507 if (ctx->nodefbox) {
508 sd->default_pos.top = sd->default_pos.left = sd->default_pos.right = sd->default_pos.bottom = 0;
509 } else if ((sd->default_pos.bottom==sd->default_pos.top) || (sd->default_pos.right==sd->default_pos.left)) {
510 sd->default_pos.left = ctx->txtx;
511 sd->default_pos.top = ctx->txty;
512 sd->default_pos.right = ctx->width + sd->default_pos.left;
513 sd->default_pos.bottom = ctx->height + sd->default_pos.top;
514 }
515
516 /*store attribs*/
517 ctx->style = sd->default_style;
518 gf_odf_tx3g_write(sd, &dsi, &dsi_len);
519 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA_NO_COPY(dsi, dsi_len) );
520
521 gf_odf_desc_del((GF_Descriptor *)sd);
522
523 ctx->default_color = ctx->style.text_color;
524 ctx->samp = gf_isom_new_text_sample();
525 ctx->state = 0;
526 ctx->end = ctx->prev_end = ctx->start = 0;
527 ctx->first_samp = GF_TRUE;
528 ctx->curLine = 0;
529
530 txtin_probe_duration(ctx);
531 return GF_OK;
532 }
533
534 static void txtin_process_send_text_sample(GF_TXTIn *ctx, GF_TextSample *txt_samp, u64 ts, u32 duration, Bool is_rap)
535 {
536 GF_FilterPacket *dst_pck;
537 u8 *pck_data;
538 u32 size;
539
540 if (ctx->seek_state==2) {
541 Double end = (Double) (ts+duration);
542 end /= 1000;
543 if (end < ctx->start_range) return;
544 ctx->seek_state = 0;
545 }
546
547 size = gf_isom_text_sample_size(txt_samp);
548
549 dst_pck = gf_filter_pck_new_alloc(ctx->opid, size, &pck_data);
550 gf_bs_reassign_buffer(ctx->bs_w, pck_data, size);
551 gf_isom_text_sample_write_bs(txt_samp, ctx->bs_w);
552
553 ts *= ctx->timescale;
554 ts /= 1000;
555 duration *= ctx->timescale;
556 duration /= 1000;
557
558 gf_filter_pck_set_sap(dst_pck, is_rap ? GF_FILTER_SAP_1 : GF_FILTER_SAP_NONE);
559 gf_filter_pck_set_cts(dst_pck, ts);
560 gf_filter_pck_set_duration(dst_pck, duration);
561
562 gf_filter_pck_send(dst_pck);
563 }
564
565 static GF_Err txtin_process_srt(GF_Filter *filter, GF_TXTIn *ctx)
566 {
567 u32 i;
568 u32 sh, sm, ss, sms, eh, em, es, ems, txt_line, char_len, char_line, j, rem_styles;
569 Bool set_start_char, set_end_char, rem_color;
570 u32 line, len;
571 char szLine[2048], szText[2048], *ptr;
572 unsigned short uniLine[5000], uniText[5000], *sptr;
573
574 if (!ctx->is_setup) {
575 ctx->is_setup = GF_TRUE;
576 return txtin_setup_srt(filter, ctx);
577 }
578 if (!ctx->opid) return GF_NOT_SUPPORTED;
579 if (!ctx->playstate) return GF_OK;
580 else if (ctx->playstate==2) return GF_EOS;
581
582 txt_line = 0;
583 set_start_char = set_end_char = GF_FALSE;
584 char_len = 0;
585
586 if (ctx->seek_state == 1) {
587 ctx->seek_state = 2;
588 gf_fseek(ctx->src, 0, SEEK_SET);
589 }
590
591 while (1) {
592 char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
593
594 if (sOK) REM_TRAIL_MARKS(szLine, "\r\n\t ")
595
596 if (!sOK || !strlen(szLine)) {
597 u32 nb_empty = 1;
598 u32 pos = (u32) gf_ftell(ctx->src);
599 if (ctx->state) {
600 while (!gf_feof(ctx->src)) {
601 sOK = gf_text_get_utf8_line(szLine+nb_empty, 2048-nb_empty, ctx->src, ctx->unicode_type);
602 if (sOK) REM_TRAIL_MARKS((szLine+nb_empty), "\r\n\t ")
603
604 if (!sOK) {
605 gf_fseek(ctx->src, pos, SEEK_SET);
606 break;
607 } else if (!strlen(szLine+nb_empty)) {
608 nb_empty++;
609 continue;
610 } else if ( sscanf(szLine+nb_empty, "%u", &line) == 1) {
611 gf_fseek(ctx->src, pos, SEEK_SET);
612 break;
613 } else {
614 u32 k;
615 for (k=0; k<nb_empty; k++) szLine[k] = '\n';
616 goto force_line;
617 }
618 }
619 }
620 ctx->style.style_flags = 0;
621 ctx->style.startCharOffset = ctx->style.endCharOffset = 0;
622 if (txt_line) {
623 if (ctx->prev_end && (ctx->start != ctx->prev_end) && (ctx->state<=2)) {
624 GF_TextSample * empty_samp = gf_isom_new_text_sample();
625 txtin_process_send_text_sample(ctx, empty_samp, ctx->prev_end, (u32) (ctx->start - ctx->prev_end), GF_TRUE );
626 gf_isom_delete_text_sample(empty_samp);
627 }
628
629 if (ctx->state<=2) {
630 txtin_process_send_text_sample(ctx, ctx->samp, ctx->start, (u32) (ctx->end - ctx->start), GF_TRUE);
631 ctx->prev_end = ctx->end;
632 }
633 txt_line = 0;
634 char_len = 0;
635 set_start_char = set_end_char = GF_FALSE;
636 ctx->style.startCharOffset = ctx->style.endCharOffset = 0;
637 gf_isom_text_reset(ctx->samp);
638
639 gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
640 }
641 ctx->state = 0;
642 if (!sOK) break;
643 continue;
644 }
645
646 force_line:
647 switch (ctx->state) {
648 case 0:
649 if (sscanf(szLine, "%u", &line) != 1) {
650 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Bad SRT formatting - expecting number got \"%s\"\n", szLine));
651 break;
652 }
653 if (line != ctx->curLine + 1) {
654 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Corrupted SRT frame %d after frame %d\n", line, ctx->curLine));
655 }
656 ctx->curLine = line;
657 ctx->state = 1;
658 break;
659 case 1:
660 if (sscanf(szLine, "%u:%u:%u,%u --> %u:%u:%u,%u", &sh, &sm, &ss, &sms, &eh, &em, &es, &ems) != 8) {
661 if (sscanf(szLine, "%u:%u:%u.%u --> %u:%u:%u.%u", &sh, &sm, &ss, &sms, &eh, &em, &es, &ems) != 8) {
662 sh = eh = 0;
663 if (sscanf(szLine, "%u:%u,%u --> %u:%u,%u", &sm, &ss, &sms, &em, &es, &ems) != 6) {
664 if (sscanf(szLine, "%u:%u.%u --> %u:%u.%u", &sm, &ss, &sms, &em, &es, &ems) != 6) {
665 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Error scanning SRT frame %d timing\n", ctx->curLine));
666 ctx->state = 0;
667 break;
668 }
669 }
670 }
671 }
672 ctx->start = (3600*sh + 60*sm + ss)*1000 + sms;
673 if (ctx->start < ctx->end) {
674 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Overlapping SRT frame %d - starts "LLD" ms is before end of previous one "LLD" ms - adjusting time stamps\n", ctx->curLine, ctx->start, ctx->end));
675 ctx->start = ctx->end;
676 }
677
678 ctx->end = (3600*eh + 60*em + es)*1000 + ems;
679 /*make stream start at 0 by inserting a fake AU*/
680 if (ctx->first_samp && (ctx->start > 0)) {
681 txtin_process_send_text_sample(ctx, ctx->samp, 0, (u32) ctx->start, GF_TRUE);
682 }
683 ctx->style.style_flags = 0;
684 ctx->state = 2;
685 if (ctx->end <= ctx->prev_end) {
686 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Overlapping SRT frame %d end "LLD" is at or before previous end "LLD" - removing\n", ctx->curLine, ctx->end, ctx->prev_end));
687 ctx->start = ctx->end;
688 ctx->state = 3;
689 }
690 break;
691
692 default:
693 /*reset only when text is present*/
694 ctx->first_samp = GF_FALSE;
695
696 /*go to line*/
697 if (txt_line) {
698 gf_isom_text_add_text(ctx->samp, "\n", 1);
699 char_len += 1;
700 }
701
702 ptr = (char *) szLine;
703 {
704 size_t _len = gf_utf8_mbstowcs(uniLine, 5000, (const char **) &ptr);
705 if (_len == (size_t) -1) {
706 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Invalid UTF data (line %d)\n", ctx->curLine));
707 ctx->state = 0;
708 }
709 len = (u32) _len;
710 }
711 i=j=0;
712 rem_styles = 0;
713 rem_color = 0;
714 while (i<len) {
715 u32 font_style = 0;
716 u32 style_nb_chars = 0;
717 u32 style_def_type = 0;
718
719 if ( (uniLine[i]=='<') && (uniLine[i+2]=='>')) {
720 style_nb_chars = 3;
721 style_def_type = 1;
722 }
723 else if ( (uniLine[i]=='<') && (uniLine[i+1]=='/') && (uniLine[i+3]=='>')) {
724 style_def_type = 2;
725 style_nb_chars = 4;
726 }
727 else if (uniLine[i]=='<') {
728 const unsigned short* src = uniLine + i;
729 size_t alen = gf_utf8_wcstombs(szLine, 2048, (const unsigned short**) & src);
730 szLine[alen] = 0;
731 strlwr(szLine);
732 if (!strncmp(szLine, "<font ", 6) ) {
733 char *a_sep = strstr(szLine, "color");
734 if (a_sep) a_sep = strchr(a_sep, '"');
735 if (a_sep) {
736 char *e_sep = strchr(a_sep+1, '"');
737 if (e_sep) {
738 e_sep[0] = 0;
739 font_style = gf_color_parse(a_sep+1);
740 e_sep[0] = '"';
741 e_sep = strchr(e_sep+1, '>');
742 if (e_sep) {
743 style_nb_chars = (u32) (1 + e_sep - szLine);
744 style_def_type = 1;
745 }
746 }
747
748 }
749 }
750 else if (!strncmp(szLine, "</font>", 7) ) {
751 style_nb_chars = 7;
752 style_def_type = 2;
753 font_style = 0xFFFFFFFF;
754 }
755 //skip unknown
756 else {
757 char *a_sep = strstr(szLine, ">");
758 if (a_sep) {
759 style_nb_chars = (u32) (a_sep - szLine);
760 i += style_nb_chars;
761 continue;
762 }
763 }
764
765 }
766
767 /*start of new style*/
768 if (style_def_type==1) {
769 /*store prev style*/
770 if (set_end_char) {
771 assert(set_start_char);
772 gf_isom_text_add_style(ctx->samp, &ctx->style);
773 set_end_char = set_start_char = GF_FALSE;
774 ctx->style.style_flags &= ~rem_styles;
775 rem_styles = 0;
776 if (rem_color) {
777 ctx->style.text_color = ctx->default_color;
778 rem_color = 0;
779 }
780 }
781 if (set_start_char && (ctx->style.startCharOffset != j)) {
782 ctx->style.endCharOffset = char_len + j;
783 if (ctx->style.style_flags) gf_isom_text_add_style(ctx->samp, &ctx->style);
784 }
785 switch (uniLine[i+1]) {
786 case 'b':
787 case 'B':
788 ctx->style.style_flags |= GF_TXT_STYLE_BOLD;
789 set_start_char = GF_TRUE;
790 ctx->style.startCharOffset = char_len + j;
791 break;
792 case 'i':
793 case 'I':
794 ctx->style.style_flags |= GF_TXT_STYLE_ITALIC;
795 set_start_char = GF_TRUE;
796 ctx->style.startCharOffset = char_len + j;
797 break;
798 case 'u':
799 case 'U':
800 ctx->style.style_flags |= GF_TXT_STYLE_UNDERLINED;
801 set_start_char = GF_TRUE;
802 ctx->style.startCharOffset = char_len + j;
803 break;
804 case 'f':
805 case 'F':
806 if (font_style) {
807 ctx->style.text_color = font_style;
808 set_start_char = GF_TRUE;
809 ctx->style.startCharOffset = char_len + j;
810 }
811 break;
812 }
813 i += style_nb_chars;
814 continue;
815 }
816
817 /*end of prev style*/
818 if (style_def_type==2) {
819 switch (uniLine[i+2]) {
820 case 'b':
821 case 'B':
822 rem_styles |= GF_TXT_STYLE_BOLD;
823 set_end_char = GF_TRUE;
824 ctx->style.endCharOffset = char_len + j;
825 break;
826 case 'i':
827 case 'I':
828 rem_styles |= GF_TXT_STYLE_ITALIC;
829 set_end_char = GF_TRUE;
830 ctx->style.endCharOffset = char_len + j;
831 break;
832 case 'u':
833 case 'U':
834 rem_styles |= GF_TXT_STYLE_UNDERLINED;
835 set_end_char = GF_TRUE;
836 ctx->style.endCharOffset = char_len + j;
837 break;
838 case 'f':
839 case 'F':
840 if (font_style) {
841 rem_color = 1;
842 set_end_char = GF_TRUE;
843 ctx->style.endCharOffset = char_len + j;
844 }
845 }
846 i+=style_nb_chars;
847 continue;
848 }
849 /*store style*/
850 if (set_end_char) {
851 gf_isom_text_add_style(ctx->samp, &ctx->style);
852 set_end_char = GF_FALSE;
853 set_start_char = GF_TRUE;
854 ctx->style.startCharOffset = char_len + j;
855 ctx->style.style_flags &= ~rem_styles;
856 rem_styles = 0;
857 ctx->style.text_color = ctx->default_color;
858 rem_color = 0;
859 }
860
861 uniText[j] = uniLine[i];
862 j++;
863 i++;
864 }
865 /*store last style*/
866 if (set_end_char) {
867 gf_isom_text_add_style(ctx->samp, &ctx->style);
868 set_end_char = GF_FALSE;
869 set_start_char = GF_TRUE;
870 ctx->style.startCharOffset = char_len + j;
871 ctx->style.style_flags &= ~rem_styles;
872 }
873
874 char_line = j;
875 uniText[j] = 0;
876
877 sptr = (u16 *) uniText;
878 len = (u32) gf_utf8_wcstombs(szText, 5000, (const u16 **) &sptr);
879
880 gf_isom_text_add_text(ctx->samp, szText, len);
881 char_len += char_line;
882 txt_line ++;
883 break;
884 }
885
886 if (gf_filter_pid_would_block(ctx->opid))
887 return GF_OK;
888 }
889
890 /*final flush*/
891 if (ctx->end && ! ctx->noflush) {
892 gf_isom_text_reset(ctx->samp);
893 txtin_process_send_text_sample(ctx, ctx->samp, ctx->end, 0, GF_TRUE);
894 ctx->end = 0;
895 }
896 gf_isom_text_reset(ctx->samp);
897
898 return GF_EOS;
899 }
900
901 /* Structure used to pass importer and track data to the parsers without exposing the GF_MediaImporter structure
902 used by WebVTT and Flash->SVG */
903 typedef struct {
904 GF_TXTIn *ctx;
905 u32 timescale;
906 u32 track;
907 u32 descriptionIndex;
908 } GF_ISOFlusher;
909
910 #ifndef GPAC_DISABLE_VTT
911
912 static GF_Err gf_webvtt_import_report(void *user, GF_Err e, char *message, const char *line)
913 {
914 GF_LOG(e ? GF_LOG_WARNING : GF_LOG_INFO, GF_LOG_AUTHOR, ("[TXTIn] WebVTT line %s: %s\n", line, message) );
915 return e;
916 }
917
918 static void gf_webvtt_import_header(void *user, const char *config)
919 {
920 GF_TXTIn *ctx = (GF_TXTIn *)user;
921 if (!ctx->hdr_parsed) {
922 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA((char *) config, (u32) (1+strlen(config)) ) );
923 ctx->hdr_parsed = GF_TRUE;
924 gf_webvtt_parser_suspend(ctx->vttparser);
925 }
926 }
927
928 static void gf_webvtt_flush_sample(void *user, GF_WebVTTSample *samp)
929 {
930 u64 start, end;
931 GF_TXTIn *ctx = (GF_TXTIn *)user;
932 GF_ISOSample *s;
933
934 start = gf_webvtt_sample_get_start(samp);
935 end = gf_webvtt_sample_get_end(samp);
936
937 if (ctx->seek_state==2) {
938 Double tsend = (Double) end;
939 tsend /= 1000;
940 if (tsend<ctx->start_range) return;
941 ctx->seek_state = 0;
942 }
943
944 s = gf_isom_webvtt_to_sample(samp);
945 if (s) {
946 GF_FilterPacket *pck;
947 u8 *pck_data;
948
949 pck = gf_filter_pck_new_alloc(ctx->opid, s->dataLength, &pck_data);
950 memcpy(pck_data, s->data, s->dataLength);
951 gf_filter_pck_set_cts(pck, (u64) (ctx->timescale * start / 1000) );
952 gf_filter_pck_set_sap(pck, GF_FILTER_SAP_1);
953
954
955 if (end && (end>=start) ) {
956 gf_filter_pck_set_duration(pck, (u32) (ctx->timescale * (end-start) / 1000) );
957 }
958 gf_filter_pck_send(pck);
959
960 gf_isom_sample_del(&s);
961 }
962 gf_webvtt_sample_del(samp);
963
964 gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
965
966 if (gf_filter_pid_would_block(ctx->opid))
967 gf_webvtt_parser_suspend(ctx->vttparser);
968
969 }
970
971 static GF_Err txtin_webvtt_setup(GF_Filter *filter, GF_TXTIn *ctx)
972 {
973 GF_Err e;
974 u32 ID, OCR_ES_ID, file_size, w, h;
975 Bool is_srt;
976 char *ext;
977
978 ctx->src = gf_fopen(ctx->file_name, "rt");
979 if (!ctx->src) return GF_URL_ERROR;
980
981 file_size = (u32) gf_fsize(ctx->src);
982
983 ctx->unicode_type = gf_text_get_utf_type(ctx->src);
984 if (ctx->unicode_type<0) {
985 gf_fclose(ctx->src);
986 ctx->src = NULL;
987 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Unsupported SRT UTF encoding\n"));
988 return GF_NOT_SUPPORTED;
989 }
990 ext = gf_file_ext_start(ctx->file_name);
991 is_srt = (ext && !strnicmp(ext, ".srt", 4)) ? GF_TRUE : GF_FALSE;
992
993
994 if (!ctx->timescale) ctx->timescale = 1000;
995 OCR_ES_ID = ID = 0;
996
997 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
998 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
999 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_WEBVTT) );
1000 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1001 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
1002
1003 w = ctx->width;
1004 h = ctx->height;
1005 if (!ID) ID = 1;
1006 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1007 if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
1008 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(w) );
1009 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(h) );
1010 if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1011 if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
1012
1013 ctx->vttparser = gf_webvtt_parser_new();
1014
1015 e = gf_webvtt_parser_init(ctx->vttparser, ctx->src, ctx->unicode_type, is_srt, ctx, gf_webvtt_import_report, gf_webvtt_flush_sample, gf_webvtt_import_header);
1016 if (e != GF_OK) {
1017 gf_webvtt_parser_del(ctx->vttparser);
1018 ctx->vttparser = NULL;
1019 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] WebVTT parser init error %s\n", gf_error_to_string(e) ));
1020 }
1021 //get the header
1022 e = gf_webvtt_parser_parse(ctx->vttparser);
1023
1024 txtin_probe_duration(ctx);
1025 return e;
1026 }
1027
1028 static GF_Err txtin_process_webvtt(GF_Filter *filter, GF_TXTIn *ctx)
1029 {
1030 GF_Err e;
1031
1032 if (!ctx->is_setup) {
1033 ctx->is_setup = GF_TRUE;
1034 return txtin_webvtt_setup(filter, ctx);
1035 }
1036 if (!ctx->vttparser) return GF_NOT_SUPPORTED;
1037 if (ctx->seek_state==1) {
1038 ctx->seek_state = 2;
1039 gf_webvtt_parser_restart(ctx->vttparser);
1040 }
1041
1042 e = gf_webvtt_parser_parse(ctx->vttparser);
1043 if (e < GF_OK) {
1044 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] WebVTT process error %s\n", gf_error_to_string(e) ));
1045 }
1046 return e;
1047 }
1048
1049 #endif /*GPAC_DISABLE_VTT*/
1050
1051 static char *ttxt_parse_string(char *str, Bool strip_lines)
1052 {
1053 u32 i=0;
1054 u32 k=0;
1055 u32 len = (u32) strlen(str);
1056 u32 state = 0;
1057
1058 if (!strip_lines) {
1059 for (i=0; i<len; i++) {
1060 if ((str[i] == '\r') && (str[i+1] == '\n')) {
1061 i++;
1062 }
1063 str[k] = str[i];
1064 k++;
1065 }
1066 str[k]=0;
1067 return str;
1068 }
1069
1070 if (str[0]!='\'') return str;
1071 for (i=0; i<len; i++) {
1072 if (str[i] == '\'') {
1073
1074 if (!state) {
1075 if (k) {
1076 str[k]='\n';
1077 k++;
1078 }
1079 state = 1; //!state;
1080 } else {
1081 if ( (i+1==len) ||
1082 ((str[i+1]==' ') || (str[i+1]=='\n') || (str[i+1]=='\r') || (str[i+1]=='\t') || (str[i+1]=='\''))
1083 ) {
1084 state = !state;
1085 } else {
1086 str[k] = str[i];
1087 k++;
1088 }
1089 }
1090 } else if (state) {
1091 str[k] = str[i];
1092 k++;
1093 }
1094 }
1095 str[k]=0;
1096 return str;
1097 }
1098
1099 static void GF_TXTIN_MODE_ebu_ttd_remove_samples(GF_XMLNode *root, GF_XMLNode **sample_list_node)
1100 {
1101 u32 idx = 0;
1102 GF_XMLNode *node = NULL;
1103 *sample_list_node = NULL;
1104 while ( (node = (GF_XMLNode*)gf_list_enum(root->content, &idx))) {
1105 if (!strcmp(node->name, "body")) {
1106 GF_XMLNode *body_node;
1107 u32 body_idx = 0;
1108 while ( (body_node = (GF_XMLNode*)gf_list_enum(node->content, &body_idx))) {
1109 if (!strcmp(body_node->name, "div")) {
1110 u32 body_num;
1111 *sample_list_node = body_node;
1112 body_num = gf_list_count(body_node->content);
1113 while (body_num--) {
1114 GF_XMLNode *content_node = (GF_XMLNode*)gf_list_get(body_node->content, 0);
1115 assert(gf_list_find(body_node->content, content_node) == 0);
1116 gf_list_rem(body_node->content, 0);
1117 gf_xml_dom_node_del(content_node);
1118 }
1119 return;
1120 }
1121 }
1122 }
1123 }
1124 }
1125
1126 #define TTML_NAMESPACE "http://www.w3.org/ns/ttml"
1127
1128 static GF_Err gf_text_ttml_setup(GF_Filter *filter, GF_TXTIn *ctx)
1129 {
1130 GF_Err e;
1131 u32 i, nb_children, ID;
1132 u64 file_size;
1133 GF_XMLAttribute *att;
1134 GF_XMLNode *root, *node, *body_node;
1135 const char *lang = ctx->lang;
1136
1137
1138 ctx->is_setup = GF_TRUE;
1139 ctx->parser = gf_xml_dom_new();
1140 e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
1141 if (e) {
1142 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTML file: Line %d - %s. Abort.\n", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser) ));
1143 ctx->is_setup = GF_TRUE;
1144 ctx->non_compliant_ttml = GF_TRUE;
1145 return e;
1146 }
1147 root = gf_xml_dom_get_root(ctx->parser);
1148 if (!root) {
1149 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTML file: no root XML element found. Abort.\n"));
1150 ctx->non_compliant_ttml = GF_TRUE;
1151 return GF_NON_COMPLIANT_BITSTREAM;
1152 }
1153
1154 /*look for TTML*/
1155 if (gf_xml_get_element_check_namespace(root, "tt", NULL) != GF_OK) {
1156 if (root->ns) {
1157 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("TTML file not recognized: root element is \"%s:%s\" (check your namespaces)\n", root->ns, root->name));
1158 } else {
1159 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("TTML file not recognized: root element is \"%s\"\n", root->name));
1160 }
1161 ctx->non_compliant_ttml = GF_TRUE;
1162 return GF_NOT_SUPPORTED;
1163 }
1164
1165 GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TXTIn] TTML EBU-TTD detected\n"));
1166
1167 root = gf_xml_dom_get_root(ctx->parser);
1168
1169
1170 /*** root (including language) ***/
1171 i=0;
1172 while ( (att = (GF_XMLAttribute *)gf_list_enum(root->attributes, &i))) {
1173 GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML] Found root attribute name %s, value %s\n", att->name, att->value));
1174
1175 if (!strcmp(att->name, "xmlns")) {
1176 if (strcmp(att->value, TTML_NAMESPACE)) {
1177 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML] XML Namespace %s not recognized, expecting %s\n", att->name, att->value, TTML_NAMESPACE));
1178 ctx->non_compliant_ttml = GF_TRUE;
1179 return GF_NON_COMPLIANT_BITSTREAM;
1180 }
1181 } else if (!strcmp(att->name, "xml:lang")) {
1182 lang = att->value;
1183 }
1184 }
1185
1186 //locate body
1187 nb_children = gf_list_count(root->content);
1188 body_node = NULL;
1189
1190 i=0;
1191 while ( (node = (GF_XMLNode*)gf_list_enum(root->content, &i))) {
1192 if (node->type) {
1193 nb_children--;
1194 continue;
1195 }
1196 e = gf_xml_get_element_check_namespace(node, "body", root->ns);
1197 if (e == GF_BAD_PARAM) {
1198 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", node->name));
1199 } else if (e == GF_OK) {
1200 if (body_node) {
1201 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"body\" element. Abort.\n"));
1202 ctx->non_compliant_ttml = GF_TRUE;
1203 return GF_NON_COMPLIANT_BITSTREAM;
1204 }
1205 body_node = node;
1206 }
1207 }
1208 if (!body_node) {
1209 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] \"body\" element not found. Abort.\n"));
1210 ctx->non_compliant_ttml = GF_TRUE;
1211 return GF_NON_COMPLIANT_BITSTREAM;
1212 }
1213
1214 i=0;
1215 while ( (node = (GF_XMLNode*)gf_list_enum(body_node->content, &i))) {
1216 if (node->type) {
1217 nb_children--;
1218 continue;
1219 }
1220 e = gf_xml_get_element_check_namespace(node, "div", root->ns);
1221 if (e == GF_BAD_PARAM) {
1222 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", node->name));
1223 } else if (e == GF_OK) {
1224 if (ctx->div_node) {
1225 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] several \"div\" found in document, only the first one will be imported - not supported but patch is welcome\n"));
1226 }
1227 ctx->div_node = node;
1228 }
1229 }
1230 file_size = ctx->end;
1231 if (!ctx->timescale) ctx->timescale = 1000;
1232
1233 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
1234 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
1235 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_SUBS_XML) );
1236 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1237 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
1238
1239 ID = 1;
1240 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1241 if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
1242 if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
1243 if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1244 if (lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( lang) );
1245 gf_filter_pid_set_property_str(ctx->opid, "meta:xmlns", &PROP_STRING(TTML_NAMESPACE) );
1246
1247 /*** body ***/
1248 ctx->parser_working_copy = gf_xml_dom_new();
1249 e = gf_xml_dom_parse(ctx->parser_working_copy, ctx->file_name, NULL, NULL);
1250 assert (e == GF_OK);
1251 ctx->root_working_copy = gf_xml_dom_get_root(ctx->parser_working_copy);
1252 assert(ctx->root_working_copy);
1253
1254 /*remove all the sample entries (instances in body) entries from the working copy, we will add each sample in this clone DOM to create full XML of each sample*/
1255 GF_TXTIN_MODE_ebu_ttd_remove_samples(ctx->root_working_copy, &ctx->sample_list_node);
1256
1257 ctx->nb_children = gf_list_count(ctx->div_node->content);
1258 ctx->cur_child_idx = 0;
1259
1260 ctx->last_sample_duration = 0;
1261 ctx->end = 0;
1262 ctx->first_samp = GF_TRUE;
1263
1264 txtin_probe_duration(ctx);
1265
1266 return GF_OK;
1267 }
1268
1269 static GF_Err gf_text_process_ttml(GF_Filter *filter, GF_TXTIn *ctx)
1270 {
1271 GF_Err e;
1272 GF_XMLNode *root;
1273 char *samp_text=NULL;
1274
1275 if (!ctx->is_setup) return gf_text_ttml_setup(filter, ctx);
1276 if (ctx->non_compliant_ttml || !ctx->opid) return GF_NOT_SUPPORTED;
1277 if (!ctx->playstate) return GF_OK;
1278 else if (ctx->playstate==2) return GF_EOS;
1279
1280 if (ctx->seek_state==1) {
1281 ctx->seek_state = 2;
1282 ctx->cur_child_idx = 0;
1283 }
1284
1285 root = gf_xml_dom_get_root(ctx->parser);
1286
1287 for (; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
1288 GF_XMLNode *p_node;
1289 GF_XMLAttribute *p_att;
1290 u32 p_idx = 0, h, m, s, ms;
1291 s64 ts_begin = -1, ts_end = -1;
1292
1293 GF_XMLNode *div_child = (GF_XMLNode*)gf_list_get(ctx->div_node->content, ctx->cur_child_idx);
1294 if (div_child->type) {
1295 continue;
1296 }
1297 e = gf_xml_get_element_check_namespace(div_child, "p", root->ns);
1298 if (e == GF_BAD_PARAM) {
1299 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", div_child->name));
1300 continue;
1301 }
1302
1303 //sample is either in the <p> ...
1304 while ( (p_att = (GF_XMLAttribute*)gf_list_enum(div_child->attributes, &p_idx))) {
1305
1306 if (!strcmp(p_att->name, "begin")) {
1307 if (ts_begin != -1) {
1308 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"begin\" attribute. Abort.\n"));
1309 e = GF_NON_COMPLIANT_BITSTREAM;
1310 goto exit;
1311 }
1312 if (sscanf(p_att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
1313 ts_begin = (h*3600 + m*60+s)*1000+ms;
1314 } else if (sscanf(p_att->value, "%u:%u:%u", &h, &m, &s) == 3) {
1315 ts_begin = (h*3600 + m*60+s)*1000;
1316 }
1317 } else if (!strcmp(p_att->name, "end")) {
1318 if (ts_end != -1) {
1319 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"end\" attribute. Abort.\n"));
1320 e = GF_NON_COMPLIANT_BITSTREAM;
1321 goto exit;
1322 }
1323 if (sscanf(p_att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
1324 ts_end = (h*3600 + m*60+s)*1000+ms;
1325 } else if (sscanf(p_att->value, "%u:%u:%u", &h, &m, &s) == 3) {
1326 ts_end = (h*3600 + m*60+s)*1000;
1327 }
1328 }
1329 if ((ts_begin != -1) && (ts_end != -1) && !samp_text && ctx->sample_list_node) {
1330 e = gf_xml_dom_append_child(ctx->sample_list_node, div_child);
1331 assert(e == GF_OK);
1332 samp_text = gf_xml_dom_serialize((GF_XMLNode*)ctx->root_working_copy, GF_FALSE);
1333 e = gf_xml_dom_rem_child(ctx->sample_list_node, div_child);
1334 assert(e == GF_OK);
1335 }
1336 }
1337
1338 //or under a <span>
1339 p_idx = 0;
1340 while ( (p_node = (GF_XMLNode*)gf_list_enum(div_child->content, &p_idx))) {
1341 e = gf_xml_get_element_check_namespace(p_node, "span", root->ns);
1342 if (e == GF_BAD_PARAM) {
1343 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] ignored \"%s\" node, check your namespaces\n", p_node->name));
1344 } else if (e == GF_OK) {
1345 u32 span_idx = 0;
1346 GF_XMLAttribute *span_att;
1347 while ( (span_att = (GF_XMLAttribute*)gf_list_enum(p_node->attributes, &span_idx))) {
1348
1349 if (!strcmp(span_att->name, "begin")) {
1350 if (ts_begin != -1) {
1351 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"begin\" attribute under <span>. Abort.\n"));
1352 e = GF_NON_COMPLIANT_BITSTREAM;
1353 goto exit;
1354 }
1355 if (sscanf(span_att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
1356 ts_begin = (h*3600 + m*60+s)*1000+ms;
1357 } else if (sscanf(span_att->value, "%u:%u:%u", &h, &m, &s) == 3) {
1358 ts_begin = (h*3600 + m*60+s)*1000;
1359 }
1360 } else if (!strcmp(span_att->name, "end")) {
1361 if (ts_end != -1) {
1362 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] duplicated \"end\" attribute under <span>. Abort.\n"));
1363 e = GF_NON_COMPLIANT_BITSTREAM;
1364 goto exit;
1365 }
1366 if (sscanf(span_att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
1367 ts_end = (h*3600 + m*60+s)*1000+ms;
1368 } else if (sscanf(span_att->value, "%u:%u:%u", &h, &m, &s) == 3) {
1369 ts_end = (h*3600 + m*60+s)*1000;
1370 }
1371 }
1372 if ((ts_begin != -1) && (ts_end != -1) && !samp_text && ctx->sample_list_node) {
1373 /*append the sample*/
1374 e = gf_xml_dom_append_child(ctx->sample_list_node, div_child);
1375 assert(e == GF_OK);
1376 samp_text = gf_xml_dom_serialize((GF_XMLNode*)ctx->root_working_copy, GF_FALSE);
1377 e = gf_xml_dom_rem_child(ctx->sample_list_node, div_child);
1378 assert(e == GF_OK);
1379 }
1380 }
1381 }
1382 }
1383
1384 if ((ts_begin != -1) && (ts_end != -1) && samp_text) {
1385 GF_FilterPacket *pck;
1386 u8 *pck_data;
1387 Bool skip_pck = GF_FALSE;
1388 u32 txt_len;
1389 char *txt_str;
1390
1391 if (ts_end < ts_begin) {
1392 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] invalid timings: \"begin\"="LLD" , \"end\"="LLD". Abort.\n", ts_begin, ts_end));
1393 e = GF_NON_COMPLIANT_BITSTREAM;
1394 goto exit;
1395 }
1396
1397 if (ts_begin < (s64) ctx->end) {
1398 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TTML EBU-TTD] timing overlapping not supported: \"begin\" is "LLD" , last \"end\" was "LLD". Abort.\n", ts_begin, ctx->end));
1399 e = GF_NOT_SUPPORTED;
1400 goto exit;
1401 }
1402
1403 txt_str = ttxt_parse_string(samp_text, GF_TRUE);
1404 if (!txt_str) txt_str = "";
1405 txt_len = (u32) strlen(txt_str);
1406
1407 if (ctx->first_samp) {
1408 ts_begin = 0; /*in MP4 we must start at T=0*/
1409 ctx->last_sample_duration = ts_end;
1410 ctx->first_samp = GF_FALSE;
1411 } else {
1412 ctx->last_sample_duration = ts_end - ts_begin;
1413 }
1414
1415 ctx->end = ts_end;
1416 GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("ts_begin="LLD", ts_end="LLD", last_sample_duration="LLU" (real duration: "LLU"), last_sample_end="LLU"\n", ts_begin, ts_end, ts_end - ctx->end, ctx->last_sample_duration, ctx->end));
1417
1418 if (ctx->seek_state==2) {
1419 Double end = (Double) ts_end;
1420 end /= ctx->timescale;
1421 if (end<ctx->start_range) skip_pck = GF_TRUE;
1422 else ctx->seek_state = 0;
1423 }
1424
1425 if (!skip_pck) {
1426 pck = gf_filter_pck_new_alloc(ctx->opid, txt_len, &pck_data);
1427 memcpy(pck_data, txt_str, txt_len);
1428 gf_filter_pck_set_sap(pck, GF_FILTER_SAP_1);
1429 gf_filter_pck_set_cts(pck, (ctx->timescale*ts_begin)/1000);
1430 gf_filter_pck_send(pck);
1431 }
1432
1433 gf_free(samp_text);
1434 samp_text = NULL;
1435 ctx->nb_p_found++;
1436 } else {
1437 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] incomplete sample (begin="LLD", end="LLD", text=\"%s\"). Skip.\n", ts_begin, ts_end, samp_text ? samp_text : "NULL"));
1438 }
1439
1440 if (gf_filter_pid_would_block(ctx->opid)) {
1441 ctx->cur_child_idx++;
1442 return GF_OK;
1443 }
1444 }
1445
1446 if (!ctx->nb_p_found) {
1447 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TTML EBU-TTD] \"%s\" div node has no <p> elements.\n", ctx->div_node->name));
1448 }
1449
1450 GF_LOG(GF_LOG_DEBUG, GF_LOG_PARSER, ("[TTML EBU-TTD] last_sample_duration="LLU", last_sample_end="LLU"\n", ctx->last_sample_duration, ctx->end));
1451
1452 gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT((u32) ctx->last_sample_duration) );
1453
1454 return GF_EOS;
1455
1456
1457 exit:
1458 ctx->non_compliant_ttml = GF_TRUE;
1459 return e;
1460 }
1461
1462 #ifndef GPAC_DISABLE_SWF_IMPORT
1463
1464 static GF_Err swf_svg_add_iso_sample(void *user, const u8 *data, u32 length, u64 timestamp, Bool isRap)
1465 {
1466 GF_FilterPacket *pck;
1467 u8 *pck_data;
1468 GF_TXTIn *ctx = (GF_TXTIn *)user;
1469
1470 if (ctx->seek_state==2) {
1471 Double ts = (Double) timestamp;
1472 ts/=1000;
1473 if (ts<ctx->start_range) return GF_OK;
1474 ctx->seek_state = 0;
1475 }
1476
1477 pck = gf_filter_pck_new_alloc(ctx->opid, length, &pck_data);
1478 memcpy(pck_data, data, length);
1479 gf_filter_pck_set_cts(pck, (u64) (ctx->timescale*timestamp/1000) );
1480 gf_filter_pck_set_sap(pck, isRap ? GF_FILTER_SAP_1 : GF_FILTER_SAP_NONE);
1481 gf_filter_pck_set_framing(pck, GF_TRUE, GF_FALSE);
1482
1483 gf_filter_pck_send(pck);
1484
1485 if (gf_filter_pid_would_block(ctx->opid))
1486 ctx->do_suspend = GF_TRUE;
1487 return GF_OK;
1488 }
1489
1490 static GF_Err swf_svg_add_iso_header(void *user, const u8 *data, u32 length, Bool isHeader)
1491 {
1492 GF_TXTIn *ctx = (GF_TXTIn *)user;
1493
1494 if (isHeader) {
1495 if (!ctx->hdr_parsed) {
1496 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, &PROP_DATA((char *)data, (u32) ( strlen(data)+1 ) ) );
1497 ctx->hdr_parsed = GF_TRUE;
1498 }
1499 } else if (!ctx->seek_state) {
1500 GF_FilterPacket *pck;
1501 u8 *pck_data;
1502 pck = gf_filter_pck_new_alloc(ctx->opid, length, &pck_data);
1503 memcpy(pck_data, data, length);
1504 gf_filter_pck_set_framing(pck, GF_FALSE, GF_TRUE);
1505
1506 gf_filter_pck_send(pck);
1507 }
1508 return GF_OK;
1509 }
1510
1511 static GF_Err gf_text_swf_setup(GF_Filter *filter, GF_TXTIn *ctx)
1512 {
1513 GF_Err e;
1514 u32 ID;
1515
1516 ctx->swf_parse = gf_swf_reader_new(NULL, ctx->file_name);
1517 e = gf_swf_read_header(ctx->swf_parse);
1518 if (e) return e;
1519 gf_swf_reader_set_user_mode(ctx->swf_parse, ctx, swf_svg_add_iso_sample, swf_svg_add_iso_header);
1520
1521 if (!ctx->timescale) ctx->timescale = 1000;
1522
1523 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
1524 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
1525 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_SIMPLE_TEXT) );
1526 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1527 // gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_UINT(file_size) );
1528
1529 //patch for old arch
1530 ctx->width = FIX2INT(ctx->swf_parse->width);
1531 ctx->height = FIX2INT(ctx->swf_parse->height);
1532 if (!ctx->width && !ctx->height) {
1533 ctx->width = 400;
1534 ctx->height = 60;
1535 }
1536 ID = 1;
1537 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1538 if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
1539 if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
1540 if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1541 if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
1542
1543 gf_filter_pid_set_property_str(ctx->opid, "meta:mime", &PROP_STRING("image/svg+xml") );
1544
1545 #ifndef GPAC_DISABLE_SVG
1546 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] swf -> svg not fully migrated, using SWF flags 0 and no flatten angle. Patch welcome\n"));
1547 e = swf_to_svg_init(ctx->swf_parse, 0, 0);
1548 #endif
1549
1550 //SWF->BIFS is handled in ctx loader, no need to define it here
1551 txtin_probe_duration(ctx);
1552
1553 return e;
1554 }
1555
1556 static GF_Err gf_text_process_swf(GF_Filter *filter, GF_TXTIn *ctx)
1557 {
1558 GF_Err e=GF_OK;
1559
1560 if (!ctx->is_setup) {
1561 ctx->is_setup = GF_TRUE;
1562 return gf_text_swf_setup(filter, ctx);
1563 }
1564 if (!ctx->opid) return GF_NOT_SUPPORTED;
1565
1566 if (ctx->seek_state==1) {
1567 ctx->seek_state = 2;
1568 gf_swf_reader_del(ctx->swf_parse);
1569 ctx->swf_parse = gf_swf_reader_new(NULL, ctx->file_name);
1570 gf_swf_read_header(ctx->swf_parse);
1571 gf_swf_reader_set_user_mode(ctx->swf_parse, ctx, swf_svg_add_iso_sample, swf_svg_add_iso_header);
1572 }
1573
1574 ctx->do_suspend = GF_FALSE;
1575 /*parse all tags*/
1576 while (e == GF_OK) {
1577 e = swf_parse_tag(ctx->swf_parse);
1578 if (ctx->do_suspend) return GF_OK;
1579 }
1580 if (e==GF_EOS) {
1581 if (ctx->swf_parse->finalize) {
1582 ctx->swf_parse->finalize(ctx->swf_parse);
1583 ctx->swf_parse->finalize = NULL;
1584 }
1585 }
1586 return e;
1587 }
1588 /* end of SWF Importer */
1589
1590 #else
1591
1592 #ifndef GPAC_DISABLE_ZLIB
1593 static GF_Err gf_text_process_swf(GF_Filter *filter, GF_TXTIn *ctx)
1594 {
1595 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("Warning: GPAC was compiled without SWF import support, can't import file.\n"));
1596 return GF_NOT_SUPPORTED;
1597 }
1598 #endif
1599
1600
1601 #endif /*GPAC_DISABLE_SWF_IMPORT*/
1602
1603 static GF_Err gf_text_process_sub(GF_Filter *filter, GF_TXTIn *ctx)
1604 {
1605 u32 i, j, len, line;
1606 GF_TextSample *samp;
1607 Double ts_scale;
1608 char szLine[2048], szTime[20], szText[2048];
1609
1610 //same setup as for srt
1611 if (!ctx->is_setup) {
1612 ctx->is_setup = GF_TRUE;
1613 return txtin_setup_srt(filter, ctx);
1614 }
1615 if (!ctx->opid) return GF_NOT_SUPPORTED;
1616 if (!ctx->playstate) return GF_OK;
1617 else if (ctx->playstate==2) return GF_EOS;
1618
1619 if (ctx->seek_state==1) {
1620 ctx->seek_state = 2;
1621 gf_fseek(ctx->src, 0, SEEK_SET);
1622 }
1623
1624 if (ctx->fps.den && ctx->fps.num) {
1625 ts_scale = ((Double) ctx->fps.num) / ctx->fps.den;
1626 } else {
1627 ts_scale = 25;
1628 }
1629
1630 line = 0;
1631
1632 while (1) {
1633 char *sOK = gf_text_get_utf8_line(szLine, 2048, ctx->src, ctx->unicode_type);
1634 if (!sOK) break;
1635
1636 REM_TRAIL_MARKS(szLine, "\r\n\t ")
1637
1638 line++;
1639 len = (u32) strlen(szLine);
1640 if (!len) continue;
1641
1642 i=0;
1643 if (szLine[i] != '{') {
1644 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Bad SUB file (line %d): expecting \"{\" got \"%c\"\n", line, szLine[i]));
1645 continue;
1646 }
1647 while (szLine[i+1] && szLine[i+1]!='}') {
1648 szTime[i] = szLine[i+1];
1649 i++;
1650 }
1651 szTime[i] = 0;
1652 ctx->start = atoi(szTime);
1653 if (ctx->start < ctx->end) {
1654 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] corrupted SUB frame (line %d) - starts (at %d ms) before end of previous one (%d ms) - adjusting time stamps\n", line, ctx->start, ctx->end));
1655 ctx->start = ctx->end;
1656 }
1657 j=i+2;
1658 i=0;
1659 if (szLine[i+j] != '{') {
1660 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Bad SUB file - expecting \"{\" got \"%c\"\n", szLine[i]));
1661 continue;
1662 }
1663 while (szLine[i+1+j] && szLine[i+1+j]!='}') {
1664 szTime[i] = szLine[i+1+j];
1665 i++;
1666 }
1667 szTime[i] = 0;
1668 ctx->end = atoi(szTime);
1669 j+=i+2;
1670
1671 if (ctx->start > ctx->end) {
1672 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] corrupted SUB frame (line %d) - ends (at %d ms) before start of current frame (%d ms) - skipping\n", line, ctx->end, ctx->start));
1673 continue;
1674 }
1675
1676 if (ctx->start && ctx->first_samp) {
1677 samp = gf_isom_new_text_sample();
1678 txtin_process_send_text_sample(ctx, samp, 0, (u32) (ts_scale*ctx->start), GF_TRUE);
1679 ctx->first_samp = GF_FALSE;
1680 gf_isom_delete_text_sample(samp);
1681 }
1682
1683 for (i=j; i<len; i++) {
1684 if (szLine[i]=='|') {
1685 szText[i-j] = '\n';
1686 } else {
1687 szText[i-j] = szLine[i];
1688 }
1689 }
1690 szText[i-j] = 0;
1691
1692 if (ctx->prev_end) {
1693 samp = gf_isom_new_text_sample();
1694 txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->prev_end), (u32) (ts_scale*(ctx->prev_end - ctx->start)), GF_TRUE);
1695 gf_isom_delete_text_sample(samp);
1696 }
1697
1698 samp = gf_isom_new_text_sample();
1699 gf_isom_text_add_text(samp, szText, (u32) strlen(szText) );
1700 txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->start), (u32) (ts_scale*(ctx->end - ctx->start)), GF_TRUE);
1701 gf_isom_delete_text_sample(samp);
1702
1703 ctx->prev_end = ctx->end;
1704
1705 gf_filter_pid_set_info(ctx->opid, GF_PROP_PID_DOWN_BYTES, &PROP_LONGUINT( gf_ftell(ctx->src )) );
1706
1707 if (gf_filter_pid_would_block(ctx->opid))
1708 return GF_OK;
1709 }
1710 /*final flush*/
1711 if (ctx->end && !ctx->noflush) {
1712 samp = gf_isom_new_text_sample();
1713 txtin_process_send_text_sample(ctx, samp, (u64) (ts_scale*(s64)ctx->end), 0, GF_TRUE);
1714 gf_isom_delete_text_sample(samp);
1715 }
1716
1717 gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT(0) );
1718
1719 return GF_EOS;
1720 }
1721
1722
1723 #define CHECK_STR(__str) \
1724 if (!__str) { \
1725 e = gf_import_message(import, GF_BAD_PARAM, "Invalid XML formatting (line %d)", parser.line); \
1726 goto exit; \
1727 } \
1728
1729
1730 static u32 ttxt_get_color(char *val)
1731 {
1732 u32 r, g, b, a, res;
1733 r = g = b = a = 0;
1734 if (sscanf(val, "%x %x %x %x", &r, &g, &b, &a) != 4) {
1735 GF_LOG(GF_LOG_WARNING, GF_LOG_PARSER, ("[TXTIn] Warning: color badly formatted %s\n", val));
1736 }
1737 res = (a&0xFF);
1738 res<<=8;
1739 res |= (r&0xFF);
1740 res<<=8;
1741 res |= (g&0xFF);
1742 res<<=8;
1743 res |= (b&0xFF);
1744 return res;
1745 }
1746
1747 static void ttxt_parse_text_box(GF_XMLNode *n, GF_BoxRecord *box)
1748 {
1749 u32 i=0;
1750 GF_XMLAttribute *att;
1751 memset(box, 0, sizeof(GF_BoxRecord));
1752 while ( (att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
1753 if (!stricmp(att->name, "top")) box->top = atoi(att->value);
1754 else if (!stricmp(att->name, "bottom")) box->bottom = atoi(att->value);
1755 else if (!stricmp(att->name, "left")) box->left = atoi(att->value);
1756 else if (!stricmp(att->name, "right")) box->right = atoi(att->value);
1757 }
1758 }
1759
1760 static void ttxt_parse_text_style(GF_TXTIn *ctx, GF_XMLNode *n, GF_StyleRecord *style)
1761 {
1762 u32 i=0;
1763 GF_XMLAttribute *att;
1764 memset(style, 0, sizeof(GF_StyleRecord));
1765 style->fontID = 1;
1766 style->font_size = ctx->fontsize ;
1767 style->text_color = 0xFFFFFFFF;
1768
1769 while ( (att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
1770 if (!stricmp(att->name, "fromChar")) style->startCharOffset = atoi(att->value);
1771 else if (!stricmp(att->name, "toChar")) style->endCharOffset = atoi(att->value);
1772 else if (!stricmp(att->name, "fontID")) style->fontID = atoi(att->value);
1773 else if (!stricmp(att->name, "fontSize")) style->font_size = atoi(att->value);
1774 else if (!stricmp(att->name, "color")) style->text_color = ttxt_get_color(att->value);
1775 else if (!stricmp(att->name, "styles")) {
1776 if (strstr(att->value, "Bold")) style->style_flags |= GF_TXT_STYLE_BOLD;
1777 if (strstr(att->value, "Italic")) style->style_flags |= GF_TXT_STYLE_ITALIC;
1778 if (strstr(att->value, "Underlined")) style->style_flags |= GF_TXT_STYLE_UNDERLINED;
1779 }
1780 }
1781 }
1782
1783 static GF_Err txtin_setup_ttxt(GF_Filter *filter, GF_TXTIn *ctx)
1784 {
1785 GF_Err e;
1786 u32 j, k, ID, OCR_ES_ID;
1787 u64 file_size;
1788 GF_XMLNode *root, *ext;
1789 GF_PropertyValue *dcd;
1790
1791 ctx->parser = gf_xml_dom_new();
1792 e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
1793 if (e) {
1794 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TTXT file: Line %d - %s\n", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser)));
1795 return e;
1796 }
1797 root = gf_xml_dom_get_root(ctx->parser);
1798
1799 if (strcmp(root->name, "TextStream")) {
1800 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid Timed Text file - expecting \"TextStream\" got %s", root->name));
1801 return GF_NON_COMPLIANT_BITSTREAM;
1802 }
1803 file_size = ctx->end;
1804 ctx->end = 0;
1805
1806 /*setup track in 3GP format directly (no ES desc)*/
1807 if (!ctx->timescale) ctx->timescale = 1000;
1808 OCR_ES_ID = ID = 0;
1809
1810 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
1811 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
1812 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_TX3G) );
1813 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
1814 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
1815
1816 if (!ID) ID = 1;
1817 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
1818 if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
1819
1820 ctx->nb_children = gf_list_count(root->content);
1821
1822 ctx->cur_child_idx = 0;
1823 for (ctx->cur_child_idx=0; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
1824 GF_XMLNode *node = (GF_XMLNode*) gf_list_get(root->content, ctx->cur_child_idx);
1825
1826 if (node->type) {
1827 continue;
1828 }
1829
1830 if (!strcmp(node->name, "TextStreamHeader")) {
1831 GF_XMLNode *sdesc;
1832 s32 w, h, tx, ty, layer;
1833 u32 tref_id;
1834 GF_XMLAttribute *att;
1835 w = ctx->width;
1836 h = ctx->height;
1837 tx = ctx->txtx;
1838 ty = ctx->txty;
1839 layer = ctx->zorder;
1840 tref_id = 0;
1841
1842 j=0;
1843 while ( (att=(GF_XMLAttribute *)gf_list_enum(node->attributes, &j))) {
1844 if (!strcmp(att->name, "width")) w = atoi(att->value);
1845 else if (!strcmp(att->name, "height")) h = atoi(att->value);
1846 else if (!strcmp(att->name, "layer")) layer = atoi(att->value);
1847 else if (!strcmp(att->name, "translation_x")) tx = atoi(att->value);
1848 else if (!strcmp(att->name, "translation_y")) ty = atoi(att->value);
1849 else if (!strcmp(att->name, "trefID")) tref_id = atoi(att->value);
1850 }
1851
1852 if (tref_id) {
1853 gf_filter_pid_set_property_str(ctx->opid, "tref:chap", &PROP_UINT(tref_id) );
1854 }
1855
1856 if (w) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(w) );
1857 if (h) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(h) );
1858 if (tx) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TRANS_X, &PROP_UINT(tx) );
1859 if (ty) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TRANS_X, &PROP_UINT(ty) );
1860 if (layer) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
1861 if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
1862
1863 j=0;
1864 while ( (sdesc=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
1865 if (sdesc->type) continue;
1866
1867 if (!strcmp(sdesc->name, "TextSampleDescription")) {
1868 GF_TextSampleDescriptor td;
1869 memset(&td, 0, sizeof(GF_TextSampleDescriptor));
1870 td.tag = GF_ODF_TEXT_CFG_TAG;
1871 td.vert_justif = (s8) -1;
1872 td.default_style.fontID = 1;
1873 td.default_style.font_size = ctx->fontsize;
1874
1875 k=0;
1876 while ( (att=(GF_XMLAttribute *)gf_list_enum(sdesc->attributes, &k))) {
1877 if (!strcmp(att->name, "horizontalJustification")) {
1878 if (!stricmp(att->value, "center")) td.horiz_justif = 1;
1879 else if (!stricmp(att->value, "right")) td.horiz_justif = (s8) -1;
1880 else if (!stricmp(att->value, "left")) td.horiz_justif = 0;
1881 }
1882 else if (!strcmp(att->name, "verticalJustification")) {
1883 if (!stricmp(att->value, "center")) td.vert_justif = 1;
1884 else if (!stricmp(att->value, "bottom")) td.vert_justif = (s8) -1;
1885 else if (!stricmp(att->value, "top")) td.vert_justif = 0;
1886 }
1887 else if (!strcmp(att->name, "backColor")) td.back_color = ttxt_get_color(att->value);
1888 else if (!strcmp(att->name, "verticalText") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_VERTICAL;
1889 else if (!strcmp(att->name, "fillTextRegion") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_FILL_REGION;
1890 else if (!strcmp(att->name, "continuousKaraoke") && !stricmp(att->value, "yes") ) td.displayFlags |= GF_TXT_KARAOKE;
1891 else if (!strcmp(att->name, "scroll")) {
1892 if (!stricmp(att->value, "inout")) td.displayFlags |= GF_TXT_SCROLL_IN | GF_TXT_SCROLL_OUT;
1893 else if (!stricmp(att->value, "in")) td.displayFlags |= GF_TXT_SCROLL_IN;
1894 else if (!stricmp(att->value, "out")) td.displayFlags |= GF_TXT_SCROLL_OUT;
1895 }
1896 else if (!strcmp(att->name, "scrollMode")) {
1897 u32 scroll_mode = GF_TXT_SCROLL_CREDITS;
1898 if (!stricmp(att->value, "Credits")) scroll_mode = GF_TXT_SCROLL_CREDITS;
1899 else if (!stricmp(att->value, "Marquee")) scroll_mode = GF_TXT_SCROLL_MARQUEE;
1900 else if (!stricmp(att->value, "Right")) scroll_mode = GF_TXT_SCROLL_RIGHT;
1901 else if (!stricmp(att->value, "Down")) scroll_mode = GF_TXT_SCROLL_DOWN;
1902 td.displayFlags |= ((scroll_mode<<7) & GF_TXT_SCROLL_DIRECTION);
1903 }
1904 }
1905
1906 k=0;
1907 while ( (ext=(GF_XMLNode*)gf_list_enum(sdesc->content, &k))) {
1908 if (ext->type) continue;
1909 if (!strcmp(ext->name, "TextBox")) ttxt_parse_text_box(ext, &td.default_pos);
1910 else if (!strcmp(ext->name, "Style")) ttxt_parse_text_style(ctx, ext, &td.default_style);
1911 else if (!strcmp(ext->name, "FontTable")) {
1912 GF_XMLNode *ftable;
1913 u32 z=0;
1914 while ( (ftable=(GF_XMLNode*)gf_list_enum(ext->content, &z))) {
1915 u32 m;
1916 if (ftable->type || strcmp(ftable->name, "FontTableEntry")) continue;
1917 td.font_count += 1;
1918 td.fonts = (GF_FontRecord*)gf_realloc(td.fonts, sizeof(GF_FontRecord)*td.font_count);
1919 m=0;
1920 while ( (att=(GF_XMLAttribute *)gf_list_enum(ftable->attributes, &m))) {
1921 if (!stricmp(att->name, "fontID")) td.fonts[td.font_count-1].fontID = atoi(att->value);
1922 else if (!stricmp(att->name, "fontName")) td.fonts[td.font_count-1].fontName = gf_strdup(att->value);
1923 }
1924 }
1925 }
1926 }
1927 if (ctx->nodefbox) {
1928 td.default_pos.top = td.default_pos.left = td.default_pos.right = td.default_pos.bottom = 0;
1929 } else {
1930 if ((td.default_pos.bottom==td.default_pos.top) || (td.default_pos.right==td.default_pos.left)) {
1931 td.default_pos.top = td.default_pos.left = 0;
1932 td.default_pos.right = w;
1933 td.default_pos.bottom = h;
1934 }
1935 }
1936 if (!td.fonts) {
1937 td.font_count = 1;
1938 td.fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
1939 td.fonts[0].fontID = 1;
1940 td.fonts[0].fontName = gf_strdup("Serif");
1941 }
1942 GF_SAFEALLOC(dcd, GF_PropertyValue);
1943 if (dcd) {
1944 dcd->type = GF_PROP_DATA;
1945
1946 gf_odf_tx3g_write(&td, &dcd->value.data.ptr, &dcd->value.data.size);
1947 if (!ctx->text_descs) ctx->text_descs = gf_list_new();
1948 gf_list_add(ctx->text_descs, dcd);
1949 }
1950
1951 for (k=0; k<td.font_count; k++) gf_free(td.fonts[k].fontName);
1952 gf_free(td.fonts);
1953 }
1954 }
1955 }
1956 else {
1957 break;
1958 }
1959 }
1960
1961 if (!ctx->text_descs) {
1962 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid Timed Text file - text stream header not found or empty\n"));
1963 return GF_NON_COMPLIANT_BITSTREAM;
1964 }
1965 dcd = gf_list_get(ctx->text_descs, 0);
1966 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, dcd);
1967 ctx->last_desc_idx = 1;
1968
1969 ctx->first_samp = GF_TRUE;
1970 ctx->last_sample_empty = GF_FALSE;
1971 ctx->last_sample_duration = 0;
1972
1973 txtin_probe_duration(ctx);
1974
1975 return GF_OK;
1976 }
1977
1978 static GF_Err txtin_process_ttxt(GF_Filter *filter, GF_TXTIn *ctx)
1979 {
1980 u32 j, k;
1981 GF_XMLNode *root, *ext;
1982
1983 if (!ctx->is_setup) {
1984 ctx->is_setup = GF_TRUE;
1985 return txtin_setup_ttxt(filter, ctx);
1986 }
1987 if (!ctx->opid) return GF_NON_COMPLIANT_BITSTREAM;
1988 if (!ctx->playstate) return GF_OK;
1989 else if (ctx->playstate==2) return GF_EOS;
1990
1991 if (ctx->seek_state==1) {
1992 ctx->seek_state = 2;
1993 ctx->cur_child_idx = 0;
1994 }
1995 root = gf_xml_dom_get_root(ctx->parser);
1996
1997 for (; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
1998 GF_TextSample * samp;
1999 u32 ts, descIndex;
2000 Bool has_text = GF_FALSE;
2001 GF_XMLAttribute *att;
2002 GF_XMLNode *node = (GF_XMLNode*) gf_list_get(root->content, ctx->cur_child_idx);
2003
2004 if (node->type) {
2005 continue;
2006 }
2007 /*sample text*/
2008 else if (strcmp(node->name, "TextSample")) continue;
2009
2010 samp = gf_isom_new_text_sample();
2011 ts = 0;
2012 descIndex = 1;
2013 ctx->last_sample_empty = GF_TRUE;
2014
2015 j=0;
2016 while ( (att=(GF_XMLAttribute*)gf_list_enum(node->attributes, &j))) {
2017 if (!strcmp(att->name, "sampleTime")) {
2018 u32 h, m, s, ms;
2019 if (sscanf(att->value, "%u:%u:%u.%u", &h, &m, &s, &ms) == 4) {
2020 ts = (h*3600 + m*60 + s)*1000 + ms;
2021 } else {
2022 ts = (u32) (atof(att->value) * 1000);
2023 }
2024 }
2025 else if (!strcmp(att->name, "sampleDescriptionIndex")) descIndex = atoi(att->value);
2026 else if (!strcmp(att->name, "text")) {
2027 u32 len;
2028 char *str = ttxt_parse_string(att->value, GF_TRUE);
2029 len = (u32) strlen(str);
2030 gf_isom_text_add_text(samp, str, len);
2031 ctx->last_sample_empty = len ? GF_FALSE : GF_TRUE;
2032 has_text = GF_TRUE;
2033 }
2034 else if (!strcmp(att->name, "scrollDelay")) gf_isom_text_set_scroll_delay(samp, (u32) (1000*atoi(att->value)));
2035 else if (!strcmp(att->name, "highlightColor")) gf_isom_text_set_highlight_color(samp, ttxt_get_color(att->value));
2036 else if (!strcmp(att->name, "wrap") && !strcmp(att->value, "Automatic")) gf_isom_text_set_wrap(samp, 0x01);
2037 }
2038
2039 /*get all modifiers*/
2040 j=0;
2041 while ( (ext=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
2042 if (!has_text && (ext->type==GF_XML_TEXT_TYPE)) {
2043 u32 len;
2044 char *str = ttxt_parse_string(ext->name, GF_FALSE);
2045 len = (u32) strlen(str);
2046 gf_isom_text_add_text(samp, str, len);
2047 ctx->last_sample_empty = len ? GF_FALSE : GF_TRUE;
2048 has_text = GF_TRUE;
2049 }
2050 if (ext->type) continue;
2051
2052 if (!stricmp(ext->name, "Style")) {
2053 GF_StyleRecord r;
2054 ttxt_parse_text_style(ctx, ext, &r);
2055 gf_isom_text_add_style(samp, &r);
2056 }
2057 else if (!stricmp(ext->name, "TextBox")) {
2058 GF_BoxRecord r;
2059 ttxt_parse_text_box(ext, &r);
2060 gf_isom_text_set_box(samp, r.top, r.left, r.bottom, r.right);
2061 }
2062 else if (!stricmp(ext->name, "Highlight")) {
2063 u16 start, end;
2064 start = end = 0;
2065 k=0;
2066 while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2067 if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2068 else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2069 }
2070 gf_isom_text_add_highlight(samp, start, end);
2071 }
2072 else if (!stricmp(ext->name, "Blinking")) {
2073 u16 start, end;
2074 start = end = 0;
2075 k=0;
2076 while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2077 if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2078 else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2079 }
2080 gf_isom_text_add_blink(samp, start, end);
2081 }
2082 else if (!stricmp(ext->name, "HyperLink")) {
2083 u16 start, end;
2084 char *url, *url_tt;
2085 start = end = 0;
2086 url = url_tt = NULL;
2087 k=0;
2088 while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2089 if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2090 else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2091 else if (!strcmp(att->name, "URL")) url = gf_strdup(att->value);
2092 else if (!strcmp(att->name, "URLToolTip")) url_tt = gf_strdup(att->value);
2093 }
2094 gf_isom_text_add_hyperlink(samp, url, url_tt, start, end);
2095 if (url) gf_free(url);
2096 if (url_tt) gf_free(url_tt);
2097 }
2098 else if (!stricmp(ext->name, "Karaoke")) {
2099 u32 startTime;
2100 GF_XMLNode *krok;
2101 startTime = 0;
2102 k=0;
2103 while ( (att=(GF_XMLAttribute *)gf_list_enum(ext->attributes, &k))) {
2104 if (!strcmp(att->name, "startTime")) startTime = (u32) (1000*atof(att->value));
2105 }
2106 gf_isom_text_add_karaoke(samp, startTime);
2107 k=0;
2108 while ( (krok=(GF_XMLNode*)gf_list_enum(ext->content, &k))) {
2109 u16 start, end;
2110 u32 endTime, m;
2111 if (krok->type) continue;
2112 if (strcmp(krok->name, "KaraokeRange")) continue;
2113 start = end = 0;
2114 endTime = 0;
2115 m=0;
2116 while ( (att=(GF_XMLAttribute *)gf_list_enum(krok->attributes, &m))) {
2117 if (!strcmp(att->name, "fromChar")) start = atoi(att->value);
2118 else if (!strcmp(att->name, "toChar")) end = atoi(att->value);
2119 else if (!strcmp(att->name, "endTime")) endTime = (u32) (1000*atof(att->value));
2120 }
2121 gf_isom_text_set_karaoke_segment(samp, endTime, start, end);
2122 }
2123 }
2124 }
2125
2126 if (!descIndex) descIndex = 1;
2127 if (descIndex != ctx->last_desc_idx) {
2128 GF_PropertyValue *dcd;
2129 ctx->last_desc_idx = descIndex;
2130 dcd = gf_list_get(ctx->text_descs, descIndex-1);
2131 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, dcd);
2132 }
2133
2134 /*in MP4 we must start at T=0, so add an empty sample*/
2135 if (ts && ctx->first_samp) {
2136 GF_TextSample * firstsamp = gf_isom_new_text_sample();
2137 txtin_process_send_text_sample(ctx, firstsamp, 0, 0, GF_TRUE);
2138 gf_isom_delete_text_sample(firstsamp);
2139 }
2140 ctx->first_samp = GF_FALSE;
2141
2142 txtin_process_send_text_sample(ctx, samp, ts, 0, GF_TRUE);
2143
2144 gf_isom_delete_text_sample(samp);
2145
2146 if (ctx->last_sample_empty) {
2147 ctx->last_sample_duration = ts - ctx->last_sample_duration;
2148 } else {
2149 ctx->last_sample_duration = ts;
2150 }
2151
2152 if (gf_filter_pid_would_block(ctx->opid)) {
2153 ctx->cur_child_idx++;
2154 return GF_OK;
2155 }
2156 }
2157
2158 if (ctx->last_sample_empty) {
2159 //this is a bit ugly, in regular streaming mode we don't want to remove empty samples
2160 //howvere the last one can be removed, adjusting the duration of the previous one.
2161 //doing this here is problematic if the loader is sent a new ttxt file, we would have a cue termination sample
2162 //we therefore share that info through pid, and let the final user (muxer& co) decide what to do
2163 gf_filter_pid_set_info_str( ctx->opid, "ttxt:rem_last", &PROP_BOOL(GF_TRUE) );
2164 gf_filter_pid_set_info_str( ctx->opid, "ttxt:last_dur", &PROP_UINT((u32) ctx->last_sample_duration) );
2165 }
2166
2167 return GF_EOS;
2168 }
2169
2170
2171 static u32 tx3g_get_color(char *value)
2172 {
2173 u32 r, g, b, a;
2174 u32 res, v;
2175 r = g = b = a = 0;
2176 if (sscanf(value, "%u%%, %u%%, %u%%, %u%%", &r, &g, &b, &a) != 4) {
2177 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("Warning: color badly formatted\n"));
2178 }
2179 v = (u32) (a*255/100);
2180 res = (v&0xFF);
2181 res<<=8;
2182 v = (u32) (r*255/100);
2183 res |= (v&0xFF);
2184 res<<=8;
2185 v = (u32) (g*255/100);
2186 res |= (v&0xFF);
2187 res<<=8;
2188 v = (u32) (b*255/100);
2189 res |= (v&0xFF);
2190 return res;
2191 }
2192
2193 static void tx3g_parse_text_box(GF_XMLNode *n, GF_BoxRecord *box)
2194 {
2195 u32 i=0;
2196 GF_XMLAttribute *att;
2197 memset(box, 0, sizeof(GF_BoxRecord));
2198 while ((att=(GF_XMLAttribute *)gf_list_enum(n->attributes, &i))) {
2199 if (!stricmp(att->name, "x")) box->left = atoi(att->value);
2200 else if (!stricmp(att->name, "y")) box->top = atoi(att->value);
2201 else if (!stricmp(att->name, "height")) box->bottom = atoi(att->value);
2202 else if (!stricmp(att->name, "width")) box->right = atoi(att->value);
2203 }
2204 }
2205
2206 typedef struct
2207 {
2208 u32 id;
2209 u32 pos;
2210 } Marker;
2211
2212 #define GET_MARKER_POS(_val, __isend) \
2213 { \
2214 u32 i, __m = atoi(att->value); \
2215 _val = 0; \
2216 for (i=0; i<nb_marks; i++) { if (__m==marks[i].id) { _val = marks[i].pos; /*if (__isend) _val--; */break; } } \
2217 }
2218
2219
2220 static GF_Err txtin_texml_setup(GF_Filter *filter, GF_TXTIn *ctx)
2221 {
2222 GF_Err e;
2223 u32 ID, OCR_ES_ID, i;
2224 u64 file_size;
2225 GF_XMLAttribute *att;
2226 GF_XMLNode *root;
2227
2228 ctx->parser = gf_xml_dom_new();
2229 e = gf_xml_dom_parse(ctx->parser, ctx->file_name, ttxt_dom_progress, ctx);
2230 if (e) {
2231 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Error parsing TeXML file: Line %d - %s", gf_xml_dom_get_line(ctx->parser), gf_xml_dom_get_error(ctx->parser) ));
2232 gf_xml_dom_del(ctx->parser);
2233 ctx->parser = NULL;
2234 return e;
2235 }
2236
2237 root = gf_xml_dom_get_root(ctx->parser);
2238
2239 if (strcmp(root->name, "text3GTrack")) {
2240 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTIn] Invalid QT TeXML file - expecting root \"text3GTrack\" got \"%s\"", root->name));
2241 return GF_NON_COMPLIANT_BITSTREAM;
2242 }
2243 file_size = ctx->end;
2244 ctx->txml_timescale = 600;
2245
2246 i=0;
2247 while ( (att=(GF_XMLAttribute *)gf_list_enum(root->attributes, &i))) {
2248 if (!strcmp(att->name, "trackWidth")) ctx->width = atoi(att->value);
2249 else if (!strcmp(att->name, "trackHeight")) ctx->height = atoi(att->value);
2250 else if (!strcmp(att->name, "layer")) ctx->zorder = atoi(att->value);
2251 else if (!strcmp(att->name, "timeScale")) ctx->txml_timescale = atoi(att->value);
2252 else if (!strcmp(att->name, "transform")) {
2253 Float fx, fy;
2254 sscanf(att->value, "translate(%f,%f)", &fx, &fy);
2255 ctx->txtx = (u32) fx;
2256 ctx->txty = (u32) fy;
2257 }
2258 }
2259
2260 /*setup track in 3GP format directly (no ES desc)*/
2261 OCR_ES_ID = ID = 0;
2262 if (!ctx->timescale) ctx->timescale = 1000;
2263
2264 if (!ctx->opid) ctx->opid = gf_filter_pid_new(filter);
2265 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, &PROP_UINT(GF_STREAM_TEXT) );
2266 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_ISOM_SUBTYPE_TX3G) );
2267 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, &PROP_UINT(ctx->timescale) );
2268 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DOWN_SIZE, &PROP_LONGUINT(file_size) );
2269
2270
2271 if (!ID) ID = 1;
2272 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ID, &PROP_UINT(ID) );
2273 if (OCR_ES_ID) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CLOCK_ID, &PROP_UINT(OCR_ES_ID) );
2274 if (ctx->width) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width) );
2275 if (ctx->height) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height) );
2276 if (ctx->zorder) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_ZORDER, &PROP_SINT(ctx->zorder) );
2277 if (ctx->lang) gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_LANGUAGE, &PROP_STRING( ctx->lang) );
2278
2279
2280 ctx->nb_children = gf_list_count(root->content);
2281 ctx->cur_child_idx = 0;
2282 txtin_probe_duration(ctx);
2283
2284 return GF_OK;
2285 }
2286
2287 static GF_Err txtin_process_texml(GF_Filter *filter, GF_TXTIn *ctx)
2288 {
2289 u32 j, k;
2290 GF_StyleRecord styles[50];
2291 Marker marks[50];
2292 GF_XMLAttribute *att;
2293 GF_XMLNode *root;
2294 Bool probe_first_desc_only = GF_FALSE;
2295
2296 if (!ctx->is_setup) {
2297 GF_Err e;
2298
2299 ctx->is_setup = GF_TRUE;
2300 e = txtin_texml_setup(filter, ctx);
2301 if (e) return e;
2302 probe_first_desc_only = GF_TRUE;
2303 }
2304 if (!ctx->opid) return GF_NON_COMPLIANT_BITSTREAM;
2305 if (!ctx->playstate && !probe_first_desc_only) return GF_OK;
2306 else if (ctx->playstate==2) return GF_EOS;
2307
2308 if (ctx->seek_state==1) {
2309 ctx->seek_state = 2;
2310 ctx->cur_child_idx = 0;
2311 ctx->start = 0;
2312 }
2313
2314 root = gf_xml_dom_get_root(ctx->parser);
2315
2316 for (; ctx->cur_child_idx < ctx->nb_children; ctx->cur_child_idx++) {
2317 GF_XMLNode *node, *desc;
2318 GF_TextSampleDescriptor td;
2319 GF_TextSample * samp = NULL;
2320 u64 duration;
2321 u32 nb_styles, nb_marks;
2322 Bool isRAP, same_style, same_box;
2323
2324 if (probe_first_desc_only && ctx->text_descs && gf_list_count(ctx->text_descs))
2325 return GF_OK;
2326
2327 memset(&td, 0, sizeof(GF_TextSampleDescriptor));
2328 node = (GF_XMLNode*)gf_list_get(root->content, ctx->cur_child_idx);
2329 if (node->type) continue;
2330 if (strcmp(node->name, "sample")) continue;
2331
2332 isRAP = GF_TRUE;
2333 duration = 1000;
2334 j=0;
2335 while ((att=(GF_XMLAttribute *)gf_list_enum(node->attributes, &j))) {
2336 if (!strcmp(att->name, "duration")) duration = atoi(att->value);
2337 else if (!strcmp(att->name, "keyframe")) isRAP = (!stricmp(att->value, "true") ? GF_TRUE : GF_FALSE);
2338 }
2339 nb_styles = 0;
2340 nb_marks = 0;
2341 same_style = same_box = GF_FALSE;
2342 j=0;
2343 while ((desc=(GF_XMLNode*)gf_list_enum(node->content, &j))) {
2344 if (desc->type) continue;
2345
2346 if (!strcmp(desc->name, "description")) {
2347 u8 *dsi;
2348 u32 dsi_len, stsd_idx;
2349 GF_XMLNode *sub;
2350 memset(&td, 0, sizeof(GF_TextSampleDescriptor));
2351 td.tag = GF_ODF_TEXT_CFG_TAG;
2352 td.vert_justif = (s8) -1;
2353 td.default_style.fontID = 1;
2354 td.default_style.font_size = ctx->fontsize;
2355
2356 k=0;
2357 while ((att=(GF_XMLAttribute *)gf_list_enum(desc->attributes, &k))) {
2358 if (!strcmp(att->name, "horizontalJustification")) {
2359 if (!stricmp(att->value, "center")) td.horiz_justif = 1;
2360 else if (!stricmp(att->value, "right")) td.horiz_justif = (s8) -1;
2361 else if (!stricmp(att->value, "left")) td.horiz_justif = 0;
2362 }
2363 else if (!strcmp(att->name, "verticalJustification")) {
2364 if (!stricmp(att->value, "center")) td.vert_justif = 1;
2365 else if (!stricmp(att->value, "bottom")) td.vert_justif = (s8) -1;
2366 else if (!stricmp(att->value, "top")) td.vert_justif = 0;
2367 }
2368 else if (!strcmp(att->name, "backgroundColor")) td.back_color = tx3g_get_color(att->value);
2369 else if (!strcmp(att->name, "displayFlags")) {
2370 Bool rev_scroll = GF_FALSE;
2371 if (strstr(att->value, "scroll")) {
2372 u32 scroll_mode = 0;
2373 if (strstr(att->value, "scrollIn")) td.displayFlags |= GF_TXT_SCROLL_IN;
2374 if (strstr(att->value, "scrollOut")) td.displayFlags |= GF_TXT_SCROLL_OUT;
2375 if (strstr(att->value, "reverse")) rev_scroll = GF_TRUE;
2376 if (strstr(att->value, "horizontal")) scroll_mode = rev_scroll ? GF_TXT_SCROLL_RIGHT : GF_TXT_SCROLL_MARQUEE;
2377 else scroll_mode = (rev_scroll ? GF_TXT_SCROLL_DOWN : GF_TXT_SCROLL_CREDITS);
2378 td.displayFlags |= (scroll_mode<<7) & GF_TXT_SCROLL_DIRECTION;
2379 }
2380 /*TODO FIXME: check in QT doc !!*/
2381 if (strstr(att->value, "writeTextVertically")) td.displayFlags |= GF_TXT_VERTICAL;
2382 if (!strcmp(att->name, "continuousKaraoke")) td.displayFlags |= GF_TXT_KARAOKE;
2383 }
2384 }
2385
2386 k=0;
2387 while ((sub=(GF_XMLNode*)gf_list_enum(desc->content, &k))) {
2388 if (sub->type) continue;
2389 if (!strcmp(sub->name, "defaultTextBox")) tx3g_parse_text_box(sub, &td.default_pos);
2390 else if (!strcmp(sub->name, "fontTable")) {
2391 GF_XMLNode *ftable;
2392 u32 m=0;
2393 while ((ftable=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
2394 if (ftable->type) continue;
2395 if (!strcmp(ftable->name, "font")) {
2396 u32 n=0;
2397 td.font_count += 1;
2398 td.fonts = (GF_FontRecord*)gf_realloc(td.fonts, sizeof(GF_FontRecord)*td.font_count);
2399 while ((att=(GF_XMLAttribute *)gf_list_enum(ftable->attributes, &n))) {
2400 if (!stricmp(att->name, "id")) td.fonts[td.font_count-1].fontID = atoi(att->value);
2401 else if (!stricmp(att->name, "name")) td.fonts[td.font_count-1].fontName = gf_strdup(att->value);
2402 }
2403 }
2404 }
2405 }
2406 else if (!strcmp(sub->name, "sharedStyles")) {
2407 GF_XMLNode *style, *ftable;
2408 u32 m=0;
2409 while ((style=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
2410 if (style->type) continue;
2411 if (!strcmp(style->name, "style")) break;
2412 }
2413 if (style) {
2414 char *cur;
2415 s32 start=0;
2416 char css_style[1024], css_val[1024];
2417 memset(&styles[nb_styles], 0, sizeof(GF_StyleRecord));
2418 m=0;
2419 while ( (att=(GF_XMLAttribute *)gf_list_enum(style->attributes, &m))) {
2420 if (!strcmp(att->name, "id")) styles[nb_styles].startCharOffset = atoi(att->value);
2421 }
2422 m=0;
2423 while ( (ftable=(GF_XMLNode*)gf_list_enum(style->content, &m))) {
2424 if (ftable->type) break;
2425 }
2426 cur = ftable ? ftable->name : NULL;
2427 while (cur) {
2428 start = gf_token_get_strip(cur, 0, "{:", " ", css_style, 1024);
2429 if (start <0) break;
2430 start = gf_token_get_strip(cur, start, ":}", " ", css_val, 1024);
2431 if (start <0) break;
2432 cur = strchr(cur+start, '{');
2433
2434 if (!strcmp(css_style, "font-table")) {
2435 u32 z;
2436 styles[nb_styles].fontID = atoi(css_val);
2437 for (z=0; z<td.font_count; z++) {
2438 if (td.fonts[z].fontID == styles[nb_styles].fontID)
2439 break;
2440 }
2441 }
2442 else if (!strcmp(css_style, "font-size")) styles[nb_styles].font_size = atoi(css_val);
2443 else if (!strcmp(css_style, "font-style") && !strcmp(css_val, "italic")) styles[nb_styles].style_flags |= GF_TXT_STYLE_ITALIC;
2444 else if (!strcmp(css_style, "font-weight") && !strcmp(css_val, "bold")) styles[nb_styles].style_flags |= GF_TXT_STYLE_BOLD;
2445 else if (!strcmp(css_style, "text-decoration") && !strcmp(css_val, "underline")) styles[nb_styles].style_flags |= GF_TXT_STYLE_UNDERLINED;
2446 else if (!strcmp(css_style, "color")) styles[nb_styles].text_color = tx3g_get_color(css_val);
2447 }
2448 if (!nb_styles) td.default_style = styles[0];
2449 nb_styles++;
2450 }
2451 }
2452
2453 }
2454 if ((td.default_pos.bottom==td.default_pos.top) || (td.default_pos.right==td.default_pos.left)) {
2455 td.default_pos.top = ctx->txty;
2456 td.default_pos.left = ctx->txtx;
2457 td.default_pos.right = ctx->width;
2458 td.default_pos.bottom = ctx->height;
2459 }
2460 if (!td.fonts) {
2461 td.font_count = 1;
2462 td.fonts = (GF_FontRecord*)gf_malloc(sizeof(GF_FontRecord));
2463 td.fonts[0].fontID = 1;
2464 td.fonts[0].fontName = gf_strdup( ctx->fontname ? ctx->fontname : "Serif");
2465 }
2466
2467 gf_odf_tx3g_write(&td, &dsi, &dsi_len);
2468 stsd_idx = 0;
2469 for (k=0; ctx->text_descs && k<gf_list_count(ctx->text_descs); k++) {
2470 GF_PropertyValue *d = gf_list_get(ctx->text_descs, k);
2471 if (d->value.data.size != dsi_len) continue;
2472 if (! memcmp(d->value.data.ptr, dsi, dsi_len)) {
2473 stsd_idx = k+1;
2474 break;
2475 }
2476 }
2477 if (stsd_idx) {
2478 gf_free(dsi);
2479 } else {
2480 GF_PropertyValue *d;
2481 GF_SAFEALLOC(d, GF_PropertyValue);
2482 if (!d) return GF_OUT_OF_MEM;
2483 d->type = GF_PROP_DATA;
2484 d->value.data.ptr = dsi;
2485 d->value.data.size = dsi_len;
2486 if (!ctx->text_descs) ctx->text_descs = gf_list_new();
2487 gf_list_add(ctx->text_descs, d);
2488 stsd_idx = gf_list_count(ctx->text_descs);
2489 }
2490 if (stsd_idx != ctx->last_desc_idx) {
2491 ctx->last_desc_idx = stsd_idx;
2492 GF_PropertyValue *d = gf_list_get(ctx->text_descs, stsd_idx-1);
2493 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, d);
2494 }
2495
2496 for (k=0; k<td.font_count; k++) gf_free(td.fonts[k].fontName);
2497 gf_free(td.fonts);
2498
2499 if (probe_first_desc_only)
2500 return GF_OK;
2501 }
2502 else if (!strcmp(desc->name, "sampleData")) {
2503 GF_XMLNode *sub;
2504 u16 start, end;
2505 u32 styleID;
2506 u32 nb_chars, txt_len, m;
2507 nb_chars = 0;
2508
2509 samp = gf_isom_new_text_sample();
2510
2511 k=0;
2512 while ((att=(GF_XMLAttribute *)gf_list_enum(desc->attributes, &k))) {
2513 if (!strcmp(att->name, "targetEncoding") && !strcmp(att->value, "utf16")) ;//is_utf16 = 1;
2514 else if (!strcmp(att->name, "scrollDelay")) gf_isom_text_set_scroll_delay(samp, atoi(att->value) );
2515 else if (!strcmp(att->name, "highlightColor")) gf_isom_text_set_highlight_color(samp, tx3g_get_color(att->value));
2516 }
2517 start = end = 0;
2518 k=0;
2519 while ((sub=(GF_XMLNode*)gf_list_enum(desc->content, &k))) {
2520 if (sub->type) continue;
2521 if (!strcmp(sub->name, "text")) {
2522 GF_XMLNode *text;
2523 styleID = 0;
2524 m=0;
2525 while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
2526 if (!strcmp(att->name, "styleID")) styleID = atoi(att->value);
2527 }
2528 txt_len = 0;
2529
2530 m=0;
2531 while ((text=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
2532 if (!text->type) {
2533 if (!strcmp(text->name, "marker")) {
2534 u32 z;
2535 memset(&marks[nb_marks], 0, sizeof(Marker));
2536 marks[nb_marks].pos = nb_chars+txt_len;
2537
2538 z = 0;
2539 while ( (att=(GF_XMLAttribute *)gf_list_enum(text->attributes, &z))) {
2540 if (!strcmp(att->name, "id")) marks[nb_marks].id = atoi(att->value);
2541 }
2542 nb_marks++;
2543 }
2544 } else if (text->type==GF_XML_TEXT_TYPE) {
2545 txt_len += (u32) strlen(text->name);
2546 gf_isom_text_add_text(samp, text->name, (u32) strlen(text->name));
2547 }
2548 }
2549 if (styleID && (!same_style || (td.default_style.startCharOffset != styleID))) {
2550 GF_StyleRecord st = td.default_style;
2551 for (m=0; m<nb_styles; m++) {
2552 if (styles[m].startCharOffset==styleID) {
2553 st = styles[m];
2554 break;
2555 }
2556 }
2557 st.startCharOffset = nb_chars;
2558 st.endCharOffset = nb_chars + txt_len;
2559 gf_isom_text_add_style(samp, &st);
2560 }
2561 nb_chars += txt_len;
2562 }
2563 else if (!stricmp(sub->name, "highlight")) {
2564 m=0;
2565 while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
2566 if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
2567 else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
2568 }
2569 gf_isom_text_add_highlight(samp, start, end);
2570 }
2571 else if (!stricmp(sub->name, "blink")) {
2572 m=0;
2573 while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
2574 if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
2575 else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
2576 }
2577 gf_isom_text_add_blink(samp, start, end);
2578 }
2579 else if (!stricmp(sub->name, "link")) {
2580 char *url, *url_tt;
2581 url = url_tt = NULL;
2582 m=0;
2583 while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
2584 if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
2585 else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
2586 else if (!strcmp(att->name, "URL") || !strcmp(att->name, "href")) url = gf_strdup(att->value);
2587 else if (!strcmp(att->name, "URLToolTip") || !strcmp(att->name, "altString")) url_tt = gf_strdup(att->value);
2588 }
2589 gf_isom_text_add_hyperlink(samp, url, url_tt, start, end);
2590 if (url) gf_free(url);
2591 if (url_tt) gf_free(url_tt);
2592 }
2593 else if (!stricmp(sub->name, "karaoke")) {
2594 u32 time = 0;
2595 GF_XMLNode *krok;
2596 m=0;
2597 while ((att=(GF_XMLAttribute *)gf_list_enum(sub->attributes, &m))) {
2598 if (!strcmp(att->name, "startTime")) time = atoi(att->value);
2599 }
2600 gf_isom_text_add_karaoke(samp, time);
2601 m=0;
2602 while ((krok=(GF_XMLNode*)gf_list_enum(sub->content, &m))) {
2603 u32 u=0;
2604 if (krok->type) continue;
2605 if (strcmp(krok->name, "run")) continue;
2606 start = end = 0;
2607 while ((att=(GF_XMLAttribute *)gf_list_enum(krok->attributes, &u))) {
2608 if (!strcmp(att->name, "startMarker")) GET_MARKER_POS(start, 0)
2609 else if (!strcmp(att->name, "endMarker")) GET_MARKER_POS(end, 1)
2610 else if (!strcmp(att->name, "duration")) time += atoi(att->value);
2611 }
2612 gf_isom_text_set_karaoke_segment(samp, time, start, end);
2613 }
2614 }
2615 }
2616 }
2617 }
2618 /*OK, let's add the sample*/
2619 if (samp) {
2620 if (!same_box) gf_isom_text_set_box(samp, td.default_pos.top, td.default_pos.left, td.default_pos.bottom, td.default_pos.right);
2621 // if (!same_style) gf_isom_text_add_style(samp, &td.default_style);
2622
2623 txtin_process_send_text_sample(ctx, samp, (ctx->start*ctx->timescale)/ctx->txml_timescale, (u32) (duration*ctx->timescale)/ctx->txml_timescale, isRAP);
2624 ctx->start += duration;
2625 gf_isom_delete_text_sample(samp);
2626
2627 }
2628 if (gf_filter_pid_would_block(ctx->opid)) {
2629 ctx->cur_child_idx++;
2630 return GF_OK;
2631 }
2632 }
2633
2634 return GF_EOS;
2635 }
2636
2637
2638 static GF_Err txtin_process(GF_Filter *filter)
2639 {
2640 GF_TXTIn *ctx = gf_filter_get_udta(filter);
2641 GF_FilterPacket *pck;
2642 GF_Err e;
2643 Bool start, end;
2644 pck = gf_filter_pid_get_packet(ctx->ipid);
2645 if (!pck) {
2646 return GF_OK;
2647 }
2648 gf_filter_pck_get_framing(pck, &start, &end);
2649 if (!end) {
2650 gf_filter_pid_drop_packet(ctx->ipid);
2651 return GF_OK;
2652 }
2653 //file is loaded
2654
2655 e = ctx->text_process(filter, ctx);
2656
2657
2658 if (e==GF_EOS) {
2659 //keep input alive until end of stream, so that we keep getting called
2660 gf_filter_pid_drop_packet(ctx->ipid);
2661 if (gf_filter_pid_is_eos(ctx->ipid))
2662 gf_filter_pid_set_eos(ctx->opid);
2663 }
2664 return e;
2665 }
2666
2667 static void ttxtin_reset(GF_TXTIn *ctx)
2668 {
2669 if (ctx->samp) gf_isom_delete_text_sample(ctx->samp);
2670 ctx->samp = NULL;
2671 if (ctx->src) gf_fclose(ctx->src);
2672 ctx->src = NULL;
2673 if (ctx->vttparser) gf_webvtt_parser_del(ctx->vttparser);
2674 ctx->vttparser = NULL;
2675 if (ctx->parser) gf_xml_dom_del(ctx->parser);
2676 ctx->parser = NULL;
2677 if (ctx->parser_working_copy) gf_xml_dom_del(ctx->parser_working_copy);
2678 ctx->parser_working_copy = NULL;
2679 }
2680
2681 static GF_Err txtin_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
2682 {
2683 GF_Err e;
2684 const char *src = NULL;
2685 GF_TXTIn *ctx = gf_filter_get_udta(filter);
2686 const GF_PropertyValue *prop;
2687
2688 if (is_remove) {
2689 ctx->ipid = NULL;
2690 return GF_OK;
2691 }
2692
2693 if (! gf_filter_pid_check_caps(pid))
2694 return GF_NOT_SUPPORTED;
2695
2696 //we must have a file path
2697 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_FILEPATH);
2698 if (prop && prop->value.string) src = prop->value.string;
2699 if (!src)
2700 return GF_NOT_SUPPORTED;
2701
2702 if (!ctx->ipid) {
2703 GF_FilterEvent fevt;
2704 ctx->ipid = pid;
2705
2706 //we work with full file only, send a play event on source to indicate that
2707 GF_FEVT_INIT(fevt, GF_FEVT_PLAY, pid);
2708 fevt.play.start_range = 0;
2709 fevt.base.on_pid = ctx->ipid;
2710 fevt.play.full_file_only = GF_TRUE;
2711 gf_filter_pid_send_event(ctx->ipid, &fevt);
2712 ctx->file_name = src;
2713 } else {
2714 if (pid != ctx->ipid) {
2715 return GF_REQUIRES_NEW_INSTANCE;
2716 }
2717 if (!strcmp(ctx->file_name, src)) return GF_OK;
2718
2719 ttxtin_reset(ctx);
2720 ctx->is_setup = GF_FALSE;
2721 ctx->file_name = src;
2722 }
2723 //guess type
2724 e = gf_text_guess_format(ctx->file_name, &ctx->fmt);
2725 if (e) return e;
2726 if (!ctx->fmt) {
2727 GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[TXTLoad] Unknown text format for %s\n", ctx->file_name));
2728 return GF_NOT_SUPPORTED;
2729 }
2730
2731 if (ctx->webvtt && (ctx->fmt == GF_TXTIN_MODE_SRT))
2732 ctx->fmt = GF_TXTIN_MODE_WEBVTT;
2733
2734 switch (ctx->fmt) {
2735 case GF_TXTIN_MODE_SRT:
2736 ctx->text_process = txtin_process_srt;
2737 break;
2738 #ifndef GPAC_DISABLE_VTT
2739 case GF_TXTIN_MODE_WEBVTT:
2740 ctx->text_process = txtin_process_webvtt;
2741 break;
2742 #endif
2743 case GF_TXTIN_MODE_TTXT:
2744 ctx->text_process = txtin_process_ttxt;
2745 break;
2746 case GF_TXTIN_MODE_TEXML:
2747 ctx->text_process = txtin_process_texml;
2748 break;
2749 case GF_TXTIN_MODE_SUB:
2750 ctx->text_process = gf_text_process_sub;
2751 break;
2752 case GF_TXTIN_MODE_TTML:
2753 ctx->text_process = gf_text_process_ttml;
2754 break;
2755 #ifndef GPAC_DISABLE_SWF_IMPORT
2756 case GF_TXTIN_MODE_SWF_SVG:
2757 ctx->text_process = gf_text_process_swf;
2758 break;
2759 #endif
2760 default:
2761 return GF_BAD_PARAM;
2762 }
2763
2764 return GF_OK;
2765 }
2766
2767 static Bool txtin_process_event(GF_Filter *filter, const GF_FilterEvent *evt)
2768 {
2769 GF_TXTIn *ctx = gf_filter_get_udta(filter);
2770 switch (evt->base.type) {
2771 case GF_FEVT_PLAY:
2772 if (ctx->playstate==1) return GF_TRUE;
2773 ctx->playstate = 1;
2774 if ((ctx->start_range < 0.1) && (evt->play.start_range<0.1)) return GF_TRUE;
2775 ctx->start_range = evt->play.start_range;
2776 ctx->seek_state = 1;
2777 //cancel play event, we work with full file
2778 return GF_TRUE;
2779
2780 case GF_FEVT_STOP:
2781 ctx->playstate = 2;
2782 //cancel play event, we work with full file
2783 return GF_TRUE;
2784 default:
2785 return GF_FALSE;
2786 }
2787 return GF_FALSE;
2788 }
2789
2790 GF_Err txtin_initialize(GF_Filter *filter)
2791 {
2792 char data[1];
2793 GF_TXTIn *ctx = gf_filter_get_udta(filter);
2794 ctx->bs_w = gf_bs_new(data, 1, GF_BITSTREAM_WRITE);
2795 return GF_OK;
2796 }
2797
2798 void txtin_finalize(GF_Filter *filter)
2799 {
2800 GF_TXTIn *ctx = gf_filter_get_udta(filter);
2801
2802 ttxtin_reset(ctx);
2803 if (ctx->bs_w) gf_bs_del(ctx->bs_w);
2804
2805 if (ctx->text_descs) {
2806 while (gf_list_count(ctx->text_descs)) {
2807 GF_PropertyValue *p = gf_list_pop_back(ctx->text_descs);
2808 gf_free(p->value.data.ptr);
2809 gf_free(p);
2810 }
2811 gf_list_del(ctx->text_descs);
2812 }
2813 #ifndef GPAC_DISABLE_SWF_IMPORT
2814 gf_swf_reader_del(ctx->swf_parse);
2815 #endif
2816 }
2817
2818 static const char *txtin_probe_data(const u8 *data, u32 data_size, GF_FilterProbeScore *score)
2819 {
2820 char *dst = NULL;
2821 u8 *res;
2822
2823 res = gf_utf_get_utf8_string_from_bom((char *)data, data_size, &dst);
2824 if (res) data = res;
2825
2826 #define PROBE_OK(_score, _mime) \
2827 *score = _score;\
2828 if (dst) gf_free(dst);\
2829 return _mime; \
2830
2831
2832 if (!strncmp(data, "WEBVTT", 6)) {
2833 PROBE_OK(GF_FPROBE_SUPPORTED, "subtitle/vtt")
2834 }
2835 if (strstr(data, " --> ")) {
2836 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/srt")
2837 }
2838 if (!strncmp(data, "FWS", 3) || !strncmp(data, "CWS", 3)) {
2839 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "application/x-shockwave-flash")
2840 }
2841
2842 if ((data[0]=='{') && strstr(data, "}{")) {
2843 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/sub")
2844
2845 }
2846 /*XML formats*/
2847 if (!strstr(data, "?>") ) {
2848 if (dst) gf_free(dst);
2849 return NULL;
2850 }
2851
2852 if (strstr(data, "<x-quicktime-tx3g") || strstr(data, "<text3GTrack")) {
2853 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "quicktime/text")
2854 }
2855 if (strstr(data, "TextStream")) {
2856 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/ttxt")
2857 }
2858 if (strstr(data, "<tt ") || strstr(data, ":tt ")) {
2859 PROBE_OK(GF_FPROBE_MAYBE_SUPPORTED, "subtitle/ttml")
2860 }
2861
2862 if (dst) gf_free(dst);
2863 return NULL;
2864 }
2865
2866 static const GF_FilterCapability TXTInCaps[] =
2867 {
2868 CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_FILE),
2869 CAP_STRING(GF_CAPS_INPUT, GF_PROP_PID_FILE_EXT, "srt|ttxt|sub|vtt|txml|ttml|swf"),
2870 CAP_STRING(GF_CAPS_INPUT, GF_PROP_PID_MIME, "x-subtitle/srt|subtitle/srt|text/srt|x-subtitle/sub|subtitle/sub|text/sub|x-subtitle/ttxt|subtitle/ttxt|text/ttxt|x-subtitle/vtt|subtitle/vtt|text/vtt|x-quicktime/text|quicktime/text|subtitle/ttml|text/ttml|application/xml+ttml|application/x-shockwave-flash"),
2871 CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_TEXT),
2872 CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_TX3G),
2873 CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_SIMPLE_TEXT),
2874 CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_WEBVTT),
2875 CAP_UINT(GF_CAPS_OUTPUT, GF_PROP_PID_CODECID, GF_CODECID_SUBS_XML),
2876 };
2877
2878 #define OFFS(_n) #_n, offsetof(GF_TXTIn, _n)
2879
2880 static const GF_FilterArgs TXTInArgs[] =
2881 {
2882 { OFFS(webvtt), "force WebVTT import of SRT files", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
2883 { OFFS(nodefbox), "skip default text box", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
2884 { OFFS(noflush), "skip final sample flush for srt", GF_PROP_BOOL, "false", NULL, GF_FS_ARG_HINT_ADVANCED},
2885 { OFFS(fontname), "default font to use", GF_PROP_STRING, NULL, NULL, 0},
2886 { OFFS(fontsize), "default font size", GF_PROP_UINT, "18", NULL, 0},
2887 { OFFS(lang), "default language to use", GF_PROP_STRING, NULL, NULL, 0},
2888 { OFFS(width), "default width of text area, set to 0 to resolve against visual PIDs", GF_PROP_UINT, "0", NULL, 0},
2889 { OFFS(height), "default height of text area, set to 0 to resolve against visual PIDs", GF_PROP_UINT, "0", NULL, 0},
2890 { OFFS(txtx), "default horizontal offset of text area: -1 (left), 0 (center) or 1 (right)", GF_PROP_UINT, "0", NULL, 0},
2891 { OFFS(txty), "default vertical offset of text area: -1 (bottom), 0 (center) or 1 (top)", GF_PROP_UINT, "0", NULL, 0},
2892 { OFFS(zorder), "default z-order of the PID", GF_PROP_SINT, "0", NULL, GF_FS_ARG_HINT_ADVANCED},
2893 { OFFS(timescale), "default timescale of the PID", GF_PROP_UINT, "1000", NULL, GF_FS_ARG_HINT_ADVANCED},
2894 {0}
2895 };
2896
2897 GF_FilterRegister TXTInRegister = {
2898 .name = "txtin",
2899 GF_FS_SET_DESCRIPTION("Subtitle loader")
2900 GF_FS_SET_HELP("This filter reads subtitle data (srt/webvtt/ttxt/sub) to produce media PIDs and frames.\n"
2901 "The TTXT documentation is available at https://wiki.gpac.io/TTXT-Format-Documentation\n"
2902 )
2903
2904 .private_size = sizeof(GF_TXTIn),
2905 .flags = GF_FS_REG_MAIN_THREAD,
2906 .args = TXTInArgs,
2907 SETCAPS(TXTInCaps),
2908 .process = txtin_process,
2909 .configure_pid = txtin_configure_pid,
2910 .process_event = txtin_process_event,
2911 .probe_data = txtin_probe_data,
2912 .initialize = txtin_initialize,
2913 .finalize = txtin_finalize
2914 };
2915
2916
2917 const GF_FilterRegister *txtin_register(GF_FilterSession *session)
2918 {
2919 return &TXTInRegister;
2920 }
2921
2922
2923 #else
2924 const GF_FilterRegister *txtin_register(GF_FilterSession *session)
2925 {
2926 return NULL;
2927 }
2928 #endif // GPAC_DISABLE_ISOM_WRITE
2929
2930