1 /* GStreamer
2 * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
3 * Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
4 * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
5 * Copyright (C) <2015> British Broadcasting Corporation <dash@rd.bbc.co.uk>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 */
22
23 /**
24 * SECTION:element-ttmlparse
25 * @title: ttmlparse
26 *
27 * Parses timed text subtitle files described using Timed Text Markup Language
28 * (TTML). Currently, only the EBU-TT-D profile of TTML, designed for
29 * distribution of subtitles over IP, is supported.
30 *
31 * The parser outputs a #GstBuffer for each scene in the input TTML file, a
32 * scene being a period of time during which a static set of subtitles should
33 * be visible. The parser places each text element within a scene into its own
34 * #GstMemory within the scene's buffer, and attaches metadata to the buffer
35 * describing the styling and layout associated with all the contained text
36 * elements. A downstream renderer element uses this information to correctly
37 * render the text on top of video frames.
38 *
39 * ## Example launch lines
40 * |[
41 * gst-launch-1.0 filesrc location=<media file location> ! video/quicktime ! qtdemux name=q ttmlrender name=r q. ! queue ! h264parse ! avdec_h264 ! autovideoconvert ! r.video_sink filesrc location=<subtitle file location> blocksize=16777216 ! queue ! ttmlparse ! r.text_sink r. ! ximagesink q. ! queue ! aacparse ! avdec_aac ! audioconvert ! alsasink
42 * ]| Parse and render TTML subtitles contained in a single XML file over an
43 * MP4 stream containing H.264 video and AAC audio.
44 *
45 */
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/types.h>
51 #include <glib.h>
52
53 #include "gstttmlparse.h"
54 #include "ttmlparse.h"
55
56 GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
57 #define GST_CAT_DEFAULT ttmlparse_debug
58
59 #define DEFAULT_ENCODING NULL
60
61 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
62 GST_PAD_SINK,
63 GST_PAD_ALWAYS,
64 GST_STATIC_CAPS ("application/ttml+xml")
65 );
66
67 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
68 GST_PAD_SRC,
69 GST_PAD_ALWAYS,
70 GST_STATIC_CAPS ("text/x-raw(meta:GstSubtitleMeta)")
71 );
72
73 static gboolean gst_ttml_parse_src_event (GstPad * pad, GstObject * parent,
74 GstEvent * event);
75 static gboolean gst_ttml_parse_src_query (GstPad * pad, GstObject * parent,
76 GstQuery * query);
77 static gboolean gst_ttml_parse_sink_event (GstPad * pad, GstObject * parent,
78 GstEvent * event);
79
80 static GstStateChangeReturn gst_ttml_parse_change_state (GstElement * element,
81 GstStateChange transition);
82
83 static GstFlowReturn gst_ttml_parse_chain (GstPad * sinkpad, GstObject * parent,
84 GstBuffer * buf);
85
86 #define gst_ttml_parse_parent_class parent_class
87 G_DEFINE_TYPE (GstTtmlParse, gst_ttml_parse, GST_TYPE_ELEMENT);
88
89 static void
gst_ttml_parse_dispose(GObject * object)90 gst_ttml_parse_dispose (GObject * object)
91 {
92 GstTtmlParse *ttmlparse = GST_TTML_PARSE (object);
93
94 GST_DEBUG_OBJECT (ttmlparse, "cleaning up subtitle parser");
95
96 g_free (ttmlparse->encoding);
97 ttmlparse->encoding = NULL;
98
99 g_free (ttmlparse->detected_encoding);
100 ttmlparse->detected_encoding = NULL;
101
102 if (ttmlparse->adapter) {
103 g_object_unref (ttmlparse->adapter);
104 ttmlparse->adapter = NULL;
105 }
106
107 if (ttmlparse->textbuf) {
108 g_string_free (ttmlparse->textbuf, TRUE);
109 ttmlparse->textbuf = NULL;
110 }
111
112 GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
113 }
114
115 static void
gst_ttml_parse_class_init(GstTtmlParseClass * klass)116 gst_ttml_parse_class_init (GstTtmlParseClass * klass)
117 {
118 GObjectClass *object_class = G_OBJECT_CLASS (klass);
119 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
120
121 object_class->dispose = gst_ttml_parse_dispose;
122
123 gst_element_class_add_pad_template (element_class,
124 gst_static_pad_template_get (&sink_templ));
125 gst_element_class_add_pad_template (element_class,
126 gst_static_pad_template_get (&src_templ));
127 gst_element_class_set_static_metadata (element_class,
128 "TTML subtitle parser", "Codec/Parser/Subtitle",
129 "Parses TTML subtitle files",
130 "GStreamer maintainers <gstreamer-devel@lists.sourceforge.net>, "
131 "Chris Bass <dash@rd.bbc.co.uk>");
132
133 element_class->change_state = gst_ttml_parse_change_state;
134 }
135
136 static void
gst_ttml_parse_init(GstTtmlParse * ttmlparse)137 gst_ttml_parse_init (GstTtmlParse * ttmlparse)
138 {
139 ttmlparse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
140 gst_pad_set_chain_function (ttmlparse->sinkpad,
141 GST_DEBUG_FUNCPTR (gst_ttml_parse_chain));
142 gst_pad_set_event_function (ttmlparse->sinkpad,
143 GST_DEBUG_FUNCPTR (gst_ttml_parse_sink_event));
144 gst_element_add_pad (GST_ELEMENT (ttmlparse), ttmlparse->sinkpad);
145
146 ttmlparse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
147 gst_pad_set_event_function (ttmlparse->srcpad,
148 GST_DEBUG_FUNCPTR (gst_ttml_parse_src_event));
149 gst_pad_set_query_function (ttmlparse->srcpad,
150 GST_DEBUG_FUNCPTR (gst_ttml_parse_src_query));
151 gst_element_add_pad (GST_ELEMENT (ttmlparse), ttmlparse->srcpad);
152
153 ttmlparse->textbuf = g_string_new (NULL);
154 gst_segment_init (&ttmlparse->segment, GST_FORMAT_TIME);
155 ttmlparse->need_segment = TRUE;
156 ttmlparse->encoding = g_strdup (DEFAULT_ENCODING);
157 ttmlparse->detected_encoding = NULL;
158 ttmlparse->adapter = gst_adapter_new ();
159 }
160
161 /*
162 * Source pad functions.
163 */
164 static gboolean
gst_ttml_parse_src_query(GstPad * pad,GstObject * parent,GstQuery * query)165 gst_ttml_parse_src_query (GstPad * pad, GstObject * parent, GstQuery * query)
166 {
167 GstTtmlParse *self = GST_TTML_PARSE (parent);
168 gboolean ret = FALSE;
169
170 GST_DEBUG ("Handling %s query", GST_QUERY_TYPE_NAME (query));
171
172 switch (GST_QUERY_TYPE (query)) {
173 case GST_QUERY_POSITION:{
174 GstFormat fmt;
175
176 gst_query_parse_position (query, &fmt, NULL);
177 if (fmt != GST_FORMAT_TIME) {
178 ret = gst_pad_peer_query (self->sinkpad, query);
179 } else {
180 ret = TRUE;
181 gst_query_set_position (query, GST_FORMAT_TIME, self->segment.position);
182 }
183 break;
184 }
185 case GST_QUERY_SEEKING:
186 {
187 GstFormat fmt;
188 gboolean seekable = FALSE;
189
190 ret = TRUE;
191
192 gst_query_parse_seeking (query, &fmt, NULL, NULL, NULL);
193 if (fmt == GST_FORMAT_TIME) {
194 GstQuery *peerquery = gst_query_new_seeking (GST_FORMAT_BYTES);
195
196 seekable = gst_pad_peer_query (self->sinkpad, peerquery);
197 if (seekable)
198 gst_query_parse_seeking (peerquery, NULL, &seekable, NULL, NULL);
199 gst_query_unref (peerquery);
200 }
201
202 gst_query_set_seeking (query, fmt, seekable, seekable ? 0 : -1, -1);
203 break;
204 }
205 default:
206 ret = gst_pad_query_default (pad, parent, query);
207 break;
208 }
209
210 return ret;
211 }
212
213 static gboolean
gst_ttml_parse_src_event(GstPad * pad,GstObject * parent,GstEvent * event)214 gst_ttml_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
215 {
216 GstTtmlParse *self = GST_TTML_PARSE (parent);
217 gboolean ret = FALSE;
218
219 GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event));
220
221 switch (GST_EVENT_TYPE (event)) {
222 case GST_EVENT_SEEK:
223 {
224 GstFormat format;
225 GstSeekFlags flags;
226 GstSeekType start_type, stop_type;
227 gint64 start, stop;
228 gdouble rate;
229 gboolean update;
230
231 gst_event_parse_seek (event, &rate, &format, &flags,
232 &start_type, &start, &stop_type, &stop);
233
234 if (format != GST_FORMAT_TIME) {
235 GST_WARNING_OBJECT (self, "we only support seeking in TIME format");
236 gst_event_unref (event);
237 goto beach;
238 }
239
240 /* Convert that seek to a seeking in bytes at position 0,
241 FIXME: could use an index */
242 ret = gst_pad_push_event (self->sinkpad,
243 gst_event_new_seek (rate, GST_FORMAT_BYTES, flags,
244 GST_SEEK_TYPE_SET, 0, GST_SEEK_TYPE_NONE, 0));
245
246 if (ret) {
247 /* Apply the seek to our segment */
248 gst_segment_do_seek (&self->segment, rate, format, flags,
249 start_type, start, stop_type, stop, &update);
250
251 GST_DEBUG_OBJECT (self, "segment after seek: %" GST_SEGMENT_FORMAT,
252 &self->segment);
253
254 self->need_segment = TRUE;
255 } else {
256 GST_WARNING_OBJECT (self, "seek to 0 bytes failed");
257 }
258
259 gst_event_unref (event);
260 break;
261 }
262 default:
263 ret = gst_pad_event_default (pad, parent, event);
264 break;
265 }
266
267 beach:
268 return ret;
269 }
270
271 static gchar *
gst_convert_to_utf8(const gchar * str,gsize len,const gchar * encoding,gsize * consumed,GError ** err)272 gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
273 gsize * consumed, GError ** err)
274 {
275 gchar *ret = NULL;
276
277 *consumed = 0;
278 /* The char cast is necessary in glib < 2.24 */
279 ret =
280 g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
281 consumed, NULL, err);
282 if (ret == NULL)
283 return ret;
284
285 /* + 3 to skip UTF-8 BOM if it was added */
286 len = strlen (ret);
287 if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
288 && (guint8) ret[2] == 0xBF)
289 memmove (ret, ret + 3, len + 1 - 3);
290
291 return ret;
292 }
293
294 static gchar *
detect_encoding(const gchar * str,gsize len)295 detect_encoding (const gchar * str, gsize len)
296 {
297 if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
298 && (guint8) str[2] == 0xBF)
299 return g_strdup ("UTF-8");
300
301 if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
302 return g_strdup ("UTF-16BE");
303
304 if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
305 return g_strdup ("UTF-16LE");
306
307 if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
308 && (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
309 return g_strdup ("UTF-32BE");
310
311 if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
312 && (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
313 return g_strdup ("UTF-32LE");
314
315 return NULL;
316 }
317
318 static gchar *
convert_encoding(GstTtmlParse * self,const gchar * str,gsize len,gsize * consumed)319 convert_encoding (GstTtmlParse * self, const gchar * str, gsize len,
320 gsize * consumed)
321 {
322 const gchar *encoding;
323 GError *err = NULL;
324 gchar *ret = NULL;
325
326 *consumed = 0;
327
328 /* First try any detected encoding */
329 if (self->detected_encoding) {
330 ret =
331 gst_convert_to_utf8 (str, len, self->detected_encoding, consumed, &err);
332
333 if (!err)
334 return ret;
335
336 GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
337 self->detected_encoding, err->message);
338 g_free (self->detected_encoding);
339 self->detected_encoding = NULL;
340 g_error_free (err);
341 }
342
343 /* Otherwise check if it's UTF8 */
344 if (self->valid_utf8) {
345 if (g_utf8_validate (str, len, NULL)) {
346 GST_LOG_OBJECT (self, "valid UTF-8, no conversion needed");
347 *consumed = len;
348 return g_strndup (str, len);
349 }
350 GST_INFO_OBJECT (self, "invalid UTF-8!");
351 self->valid_utf8 = FALSE;
352 }
353
354 /* Else try fallback */
355 encoding = self->encoding;
356 if (encoding == NULL || *encoding == '\0') {
357 /* if local encoding is UTF-8 and no encoding specified
358 * via the environment variable, assume ISO-8859-15 */
359 if (g_get_charset (&encoding)) {
360 encoding = "ISO-8859-15";
361 }
362 }
363
364 ret = gst_convert_to_utf8 (str, len, encoding, consumed, &err);
365
366 if (err) {
367 GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
368 encoding, err->message);
369 g_error_free (err);
370
371 /* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
372 ret = gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed, NULL);
373 }
374
375 GST_LOG_OBJECT (self,
376 "successfully converted %" G_GSIZE_FORMAT " characters from %s to UTF-8"
377 "%s", len, encoding, (err) ? " , using ISO-8859-15 as fallback" : "");
378
379 return ret;
380 }
381
382 static GstCaps *
gst_ttml_parse_get_src_caps(GstTtmlParse * self)383 gst_ttml_parse_get_src_caps (GstTtmlParse * self)
384 {
385 GstCaps *caps;
386 GstCapsFeatures *features = gst_caps_features_new ("meta:GstSubtitleMeta",
387 NULL);
388
389 caps = gst_caps_new_empty_simple ("text/x-raw");
390 gst_caps_set_features (caps, 0, features);
391 return caps;
392 }
393
394 static void
feed_textbuf(GstTtmlParse * self,GstBuffer * buf)395 feed_textbuf (GstTtmlParse * self, GstBuffer * buf)
396 {
397 gboolean discont;
398 gsize consumed;
399 gchar *input = NULL;
400 const guint8 *data;
401 gsize avail;
402
403 discont = GST_BUFFER_IS_DISCONT (buf);
404
405 if (GST_BUFFER_OFFSET_IS_VALID (buf) &&
406 GST_BUFFER_OFFSET (buf) != self->offset) {
407 self->offset = GST_BUFFER_OFFSET (buf);
408 discont = TRUE;
409 }
410
411 if (discont) {
412 GST_INFO ("discontinuity");
413 /* flush the parser state */
414 g_string_truncate (self->textbuf, 0);
415 gst_adapter_clear (self->adapter);
416 /* we could set a flag to make sure that the next buffer we push out also
417 * has the DISCONT flag set, but there's no point really given that it's
418 * subtitles which are discontinuous by nature. */
419 }
420
421 self->offset += gst_buffer_get_size (buf);
422
423 gst_adapter_push (self->adapter, buf);
424
425 avail = gst_adapter_available (self->adapter);
426 data = gst_adapter_map (self->adapter, avail);
427 input = convert_encoding (self, (const gchar *) data, avail, &consumed);
428
429 if (input && consumed > 0) {
430 if (self->textbuf) {
431 g_string_free (self->textbuf, TRUE);
432 self->textbuf = NULL;
433 }
434 self->textbuf = g_string_new (input);
435 gst_adapter_unmap (self->adapter);
436 gst_adapter_flush (self->adapter, consumed);
437 } else {
438 gst_adapter_unmap (self->adapter);
439 }
440
441 g_free (input);
442 }
443
444 static GstFlowReturn
handle_buffer(GstTtmlParse * self,GstBuffer * buf)445 handle_buffer (GstTtmlParse * self, GstBuffer * buf)
446 {
447 GstFlowReturn ret = GST_FLOW_OK;
448 GstCaps *caps = NULL;
449 GList *subtitle_list, *subtitle;
450 GstClockTime begin = GST_BUFFER_PTS (buf);
451 GstClockTime duration = GST_BUFFER_DURATION (buf);
452
453 if (self->first_buffer) {
454 GstMapInfo map;
455
456 gst_buffer_map (buf, &map, GST_MAP_READ);
457 self->detected_encoding = detect_encoding ((gchar *) map.data, map.size);
458 gst_buffer_unmap (buf, &map);
459 self->first_buffer = FALSE;
460 }
461
462 feed_textbuf (self, buf);
463
464 if (!(caps = gst_ttml_parse_get_src_caps (self)))
465 return GST_FLOW_EOS;
466 gst_caps_unref (caps);
467
468 /* Push newsegment if needed */
469 if (self->need_segment) {
470 GST_LOG_OBJECT (self, "pushing newsegment event with %" GST_SEGMENT_FORMAT,
471 &self->segment);
472
473 gst_pad_push_event (self->srcpad, gst_event_new_segment (&self->segment));
474 self->need_segment = FALSE;
475 }
476
477 subtitle_list = ttml_parse (self->textbuf->str, begin, duration);
478
479 for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) {
480 GstBuffer *op_buffer = subtitle->data;
481 self->segment.position = GST_BUFFER_PTS (op_buffer);
482
483 ret = gst_pad_push (self->srcpad, op_buffer);
484
485 if (ret != GST_FLOW_OK)
486 GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
487 }
488
489 g_list_free (subtitle_list);
490 return ret;
491 }
492
493 static GstFlowReturn
gst_ttml_parse_chain(GstPad * sinkpad,GstObject * parent,GstBuffer * buf)494 gst_ttml_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
495 {
496 GstTtmlParse *self = GST_TTML_PARSE (parent);
497 return handle_buffer (self, buf);
498 }
499
500 static gboolean
gst_ttml_parse_sink_event(GstPad * pad,GstObject * parent,GstEvent * event)501 gst_ttml_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
502 {
503 GstTtmlParse *self = GST_TTML_PARSE (parent);
504 gboolean ret = FALSE;
505
506 GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event));
507
508 switch (GST_EVENT_TYPE (event)) {
509 case GST_EVENT_SEGMENT:
510 {
511 const GstSegment *s;
512 gst_event_parse_segment (event, &s);
513 if (s->format == GST_FORMAT_TIME)
514 gst_event_copy_segment (event, &self->segment);
515 GST_DEBUG_OBJECT (self, "newsegment (%s)",
516 gst_format_get_name (self->segment.format));
517
518 /* if not time format, we'll either start with a 0 timestamp anyway or
519 * it's following a seek in which case we'll have saved the requested
520 * seek segment and don't want to overwrite it (remember that on a seek
521 * we always just seek back to the start in BYTES format and just throw
522 * away all text that's before the requested position; if the subtitles
523 * come from an upstream demuxer, it won't be able to handle our BYTES
524 * seek request and instead send us a newsegment from the seek request
525 * it received via its video pads instead, so all is fine then too) */
526 ret = TRUE;
527 self->need_segment = TRUE;
528 gst_event_unref (event);
529 break;
530 }
531 case GST_EVENT_CAPS:
532 {
533 GstCaps *caps;
534 gst_event_unref (event);
535
536 caps = gst_ttml_parse_get_src_caps (self);
537 event = gst_event_new_caps (caps);
538 gst_caps_unref (caps);
539
540 ret = gst_pad_push_event (self->srcpad, event);
541 break;
542 }
543 default:
544 ret = gst_pad_event_default (pad, parent, event);
545 break;
546 }
547
548 return ret;
549 }
550
551 static GstStateChangeReturn
gst_ttml_parse_change_state(GstElement * element,GstStateChange transition)552 gst_ttml_parse_change_state (GstElement * element, GstStateChange transition)
553 {
554 GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
555 GstTtmlParse *self = GST_TTML_PARSE (element);
556
557 switch (transition) {
558 case GST_STATE_CHANGE_READY_TO_PAUSED:
559 /* format detection will init the parser state */
560 self->offset = 0;
561 self->valid_utf8 = TRUE;
562 self->first_buffer = TRUE;
563 g_free (self->detected_encoding);
564 self->detected_encoding = NULL;
565 g_string_truncate (self->textbuf, 0);
566 gst_adapter_clear (self->adapter);
567 break;
568 default:
569 break;
570 }
571
572 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
573 if (ret == GST_STATE_CHANGE_FAILURE)
574 return ret;
575
576 switch (transition) {
577 case GST_STATE_CHANGE_PAUSED_TO_READY:
578 break;
579 default:
580 break;
581 }
582
583 return ret;
584 }
585