1 /* GStreamer
2  * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
3  * Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
4  * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
5  * Copyright (C) <2015> British Broadcasting Corporation <dash@rd.bbc.co.uk>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /**
24  * SECTION:element-ttmlparse
25  * @title: ttmlparse
26  *
27  * Parses timed text subtitle files described using Timed Text Markup Language
28  * (TTML). Currently, only the EBU-TT-D profile of TTML, designed for
29  * distribution of subtitles over IP, is supported.
30  *
31  * The parser outputs a #GstBuffer for each scene in the input TTML file, a
32  * scene being a period of time during which a static set of subtitles should
33  * be visible. The parser places each text element within a scene into its own
34  * #GstMemory within the scene's buffer, and attaches metadata to the buffer
35  * describing the styling and layout associated with all the contained text
36  * elements. A downstream renderer element uses this information to correctly
37  * render the text on top of video frames.
38  *
39  * ## Example launch lines
40  * |[
41  * gst-launch-1.0 filesrc location=<media file location> ! video/quicktime ! qtdemux name=q ttmlrender name=r q. ! queue ! h264parse ! avdec_h264 ! autovideoconvert ! r.video_sink filesrc location=<subtitle file location> blocksize=16777216 ! queue ! ttmlparse ! r.text_sink r. ! ximagesink q. ! queue ! aacparse ! avdec_aac ! audioconvert ! alsasink
42  * ]| Parse and render TTML subtitles contained in a single XML file over an
43  * MP4 stream containing H.264 video and AAC audio.
44  *
45  */
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/types.h>
51 #include <glib.h>
52 
53 #include "gstttmlparse.h"
54 #include "ttmlparse.h"
55 
56 GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
57 #define GST_CAT_DEFAULT ttmlparse_debug
58 
59 #define DEFAULT_ENCODING   NULL
60 
61 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
62     GST_PAD_SINK,
63     GST_PAD_ALWAYS,
64     GST_STATIC_CAPS ("application/ttml+xml")
65     );
66 
67 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
68     GST_PAD_SRC,
69     GST_PAD_ALWAYS,
70     GST_STATIC_CAPS ("text/x-raw(meta:GstSubtitleMeta)")
71     );
72 
73 static gboolean gst_ttml_parse_src_event (GstPad * pad, GstObject * parent,
74     GstEvent * event);
75 static gboolean gst_ttml_parse_src_query (GstPad * pad, GstObject * parent,
76     GstQuery * query);
77 static gboolean gst_ttml_parse_sink_event (GstPad * pad, GstObject * parent,
78     GstEvent * event);
79 
80 static GstStateChangeReturn gst_ttml_parse_change_state (GstElement * element,
81     GstStateChange transition);
82 
83 static GstFlowReturn gst_ttml_parse_chain (GstPad * sinkpad, GstObject * parent,
84     GstBuffer * buf);
85 
86 #define gst_ttml_parse_parent_class parent_class
87 G_DEFINE_TYPE (GstTtmlParse, gst_ttml_parse, GST_TYPE_ELEMENT);
88 
89 static void
gst_ttml_parse_dispose(GObject * object)90 gst_ttml_parse_dispose (GObject * object)
91 {
92   GstTtmlParse *ttmlparse = GST_TTML_PARSE (object);
93 
94   GST_DEBUG_OBJECT (ttmlparse, "cleaning up subtitle parser");
95 
96   g_free (ttmlparse->encoding);
97   ttmlparse->encoding = NULL;
98 
99   g_free (ttmlparse->detected_encoding);
100   ttmlparse->detected_encoding = NULL;
101 
102   if (ttmlparse->adapter) {
103     g_object_unref (ttmlparse->adapter);
104     ttmlparse->adapter = NULL;
105   }
106 
107   if (ttmlparse->textbuf) {
108     g_string_free (ttmlparse->textbuf, TRUE);
109     ttmlparse->textbuf = NULL;
110   }
111 
112   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
113 }
114 
115 static void
gst_ttml_parse_class_init(GstTtmlParseClass * klass)116 gst_ttml_parse_class_init (GstTtmlParseClass * klass)
117 {
118   GObjectClass *object_class = G_OBJECT_CLASS (klass);
119   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
120 
121   object_class->dispose = gst_ttml_parse_dispose;
122 
123   gst_element_class_add_pad_template (element_class,
124       gst_static_pad_template_get (&sink_templ));
125   gst_element_class_add_pad_template (element_class,
126       gst_static_pad_template_get (&src_templ));
127   gst_element_class_set_static_metadata (element_class,
128       "TTML subtitle parser", "Codec/Parser/Subtitle",
129       "Parses TTML subtitle files",
130       "GStreamer maintainers <gstreamer-devel@lists.sourceforge.net>, "
131       "Chris Bass <dash@rd.bbc.co.uk>");
132 
133   element_class->change_state = gst_ttml_parse_change_state;
134 }
135 
136 static void
gst_ttml_parse_init(GstTtmlParse * ttmlparse)137 gst_ttml_parse_init (GstTtmlParse * ttmlparse)
138 {
139   ttmlparse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
140   gst_pad_set_chain_function (ttmlparse->sinkpad,
141       GST_DEBUG_FUNCPTR (gst_ttml_parse_chain));
142   gst_pad_set_event_function (ttmlparse->sinkpad,
143       GST_DEBUG_FUNCPTR (gst_ttml_parse_sink_event));
144   gst_element_add_pad (GST_ELEMENT (ttmlparse), ttmlparse->sinkpad);
145 
146   ttmlparse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
147   gst_pad_set_event_function (ttmlparse->srcpad,
148       GST_DEBUG_FUNCPTR (gst_ttml_parse_src_event));
149   gst_pad_set_query_function (ttmlparse->srcpad,
150       GST_DEBUG_FUNCPTR (gst_ttml_parse_src_query));
151   gst_element_add_pad (GST_ELEMENT (ttmlparse), ttmlparse->srcpad);
152 
153   ttmlparse->textbuf = g_string_new (NULL);
154   gst_segment_init (&ttmlparse->segment, GST_FORMAT_TIME);
155   ttmlparse->need_segment = TRUE;
156   ttmlparse->encoding = g_strdup (DEFAULT_ENCODING);
157   ttmlparse->detected_encoding = NULL;
158   ttmlparse->adapter = gst_adapter_new ();
159 }
160 
161 /*
162  * Source pad functions.
163  */
164 static gboolean
gst_ttml_parse_src_query(GstPad * pad,GstObject * parent,GstQuery * query)165 gst_ttml_parse_src_query (GstPad * pad, GstObject * parent, GstQuery * query)
166 {
167   GstTtmlParse *self = GST_TTML_PARSE (parent);
168   gboolean ret = FALSE;
169 
170   GST_DEBUG ("Handling %s query", GST_QUERY_TYPE_NAME (query));
171 
172   switch (GST_QUERY_TYPE (query)) {
173     case GST_QUERY_POSITION:{
174       GstFormat fmt;
175 
176       gst_query_parse_position (query, &fmt, NULL);
177       if (fmt != GST_FORMAT_TIME) {
178         ret = gst_pad_peer_query (self->sinkpad, query);
179       } else {
180         ret = TRUE;
181         gst_query_set_position (query, GST_FORMAT_TIME, self->segment.position);
182       }
183       break;
184     }
185     case GST_QUERY_SEEKING:
186     {
187       GstFormat fmt;
188       gboolean seekable = FALSE;
189 
190       ret = TRUE;
191 
192       gst_query_parse_seeking (query, &fmt, NULL, NULL, NULL);
193       if (fmt == GST_FORMAT_TIME) {
194         GstQuery *peerquery = gst_query_new_seeking (GST_FORMAT_BYTES);
195 
196         seekable = gst_pad_peer_query (self->sinkpad, peerquery);
197         if (seekable)
198           gst_query_parse_seeking (peerquery, NULL, &seekable, NULL, NULL);
199         gst_query_unref (peerquery);
200       }
201 
202       gst_query_set_seeking (query, fmt, seekable, seekable ? 0 : -1, -1);
203       break;
204     }
205     default:
206       ret = gst_pad_query_default (pad, parent, query);
207       break;
208   }
209 
210   return ret;
211 }
212 
213 static gboolean
gst_ttml_parse_src_event(GstPad * pad,GstObject * parent,GstEvent * event)214 gst_ttml_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
215 {
216   GstTtmlParse *self = GST_TTML_PARSE (parent);
217   gboolean ret = FALSE;
218 
219   GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event));
220 
221   switch (GST_EVENT_TYPE (event)) {
222     case GST_EVENT_SEEK:
223     {
224       GstFormat format;
225       GstSeekFlags flags;
226       GstSeekType start_type, stop_type;
227       gint64 start, stop;
228       gdouble rate;
229       gboolean update;
230 
231       gst_event_parse_seek (event, &rate, &format, &flags,
232           &start_type, &start, &stop_type, &stop);
233 
234       if (format != GST_FORMAT_TIME) {
235         GST_WARNING_OBJECT (self, "we only support seeking in TIME format");
236         gst_event_unref (event);
237         goto beach;
238       }
239 
240       /* Convert that seek to a seeking in bytes at position 0,
241          FIXME: could use an index */
242       ret = gst_pad_push_event (self->sinkpad,
243           gst_event_new_seek (rate, GST_FORMAT_BYTES, flags,
244               GST_SEEK_TYPE_SET, 0, GST_SEEK_TYPE_NONE, 0));
245 
246       if (ret) {
247         /* Apply the seek to our segment */
248         gst_segment_do_seek (&self->segment, rate, format, flags,
249             start_type, start, stop_type, stop, &update);
250 
251         GST_DEBUG_OBJECT (self, "segment after seek: %" GST_SEGMENT_FORMAT,
252             &self->segment);
253 
254         self->need_segment = TRUE;
255       } else {
256         GST_WARNING_OBJECT (self, "seek to 0 bytes failed");
257       }
258 
259       gst_event_unref (event);
260       break;
261     }
262     default:
263       ret = gst_pad_event_default (pad, parent, event);
264       break;
265   }
266 
267 beach:
268   return ret;
269 }
270 
271 static gchar *
gst_convert_to_utf8(const gchar * str,gsize len,const gchar * encoding,gsize * consumed,GError ** err)272 gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
273     gsize * consumed, GError ** err)
274 {
275   gchar *ret = NULL;
276 
277   *consumed = 0;
278   /* The char cast is necessary in glib < 2.24 */
279   ret =
280       g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
281       consumed, NULL, err);
282   if (ret == NULL)
283     return ret;
284 
285   /* + 3 to skip UTF-8 BOM if it was added */
286   len = strlen (ret);
287   if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
288       && (guint8) ret[2] == 0xBF)
289     memmove (ret, ret + 3, len + 1 - 3);
290 
291   return ret;
292 }
293 
294 static gchar *
detect_encoding(const gchar * str,gsize len)295 detect_encoding (const gchar * str, gsize len)
296 {
297   if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
298       && (guint8) str[2] == 0xBF)
299     return g_strdup ("UTF-8");
300 
301   if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
302     return g_strdup ("UTF-16BE");
303 
304   if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
305     return g_strdup ("UTF-16LE");
306 
307   if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
308       && (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
309     return g_strdup ("UTF-32BE");
310 
311   if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
312       && (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
313     return g_strdup ("UTF-32LE");
314 
315   return NULL;
316 }
317 
318 static gchar *
convert_encoding(GstTtmlParse * self,const gchar * str,gsize len,gsize * consumed)319 convert_encoding (GstTtmlParse * self, const gchar * str, gsize len,
320     gsize * consumed)
321 {
322   const gchar *encoding;
323   GError *err = NULL;
324   gchar *ret = NULL;
325 
326   *consumed = 0;
327 
328   /* First try any detected encoding */
329   if (self->detected_encoding) {
330     ret =
331         gst_convert_to_utf8 (str, len, self->detected_encoding, consumed, &err);
332 
333     if (!err)
334       return ret;
335 
336     GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
337         self->detected_encoding, err->message);
338     g_free (self->detected_encoding);
339     self->detected_encoding = NULL;
340     g_error_free (err);
341   }
342 
343   /* Otherwise check if it's UTF8 */
344   if (self->valid_utf8) {
345     if (g_utf8_validate (str, len, NULL)) {
346       GST_LOG_OBJECT (self, "valid UTF-8, no conversion needed");
347       *consumed = len;
348       return g_strndup (str, len);
349     }
350     GST_INFO_OBJECT (self, "invalid UTF-8!");
351     self->valid_utf8 = FALSE;
352   }
353 
354   /* Else try fallback */
355   encoding = self->encoding;
356   if (encoding == NULL || *encoding == '\0') {
357     /* if local encoding is UTF-8 and no encoding specified
358      * via the environment variable, assume ISO-8859-15 */
359     if (g_get_charset (&encoding)) {
360       encoding = "ISO-8859-15";
361     }
362   }
363 
364   ret = gst_convert_to_utf8 (str, len, encoding, consumed, &err);
365 
366   if (err) {
367     GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
368         encoding, err->message);
369     g_error_free (err);
370 
371     /* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
372     ret = gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed, NULL);
373   }
374 
375   GST_LOG_OBJECT (self,
376       "successfully converted %" G_GSIZE_FORMAT " characters from %s to UTF-8"
377       "%s", len, encoding, (err) ? " , using ISO-8859-15 as fallback" : "");
378 
379   return ret;
380 }
381 
382 static GstCaps *
gst_ttml_parse_get_src_caps(GstTtmlParse * self)383 gst_ttml_parse_get_src_caps (GstTtmlParse * self)
384 {
385   GstCaps *caps;
386   GstCapsFeatures *features = gst_caps_features_new ("meta:GstSubtitleMeta",
387       NULL);
388 
389   caps = gst_caps_new_empty_simple ("text/x-raw");
390   gst_caps_set_features (caps, 0, features);
391   return caps;
392 }
393 
394 static void
feed_textbuf(GstTtmlParse * self,GstBuffer * buf)395 feed_textbuf (GstTtmlParse * self, GstBuffer * buf)
396 {
397   gboolean discont;
398   gsize consumed;
399   gchar *input = NULL;
400   const guint8 *data;
401   gsize avail;
402 
403   discont = GST_BUFFER_IS_DISCONT (buf);
404 
405   if (GST_BUFFER_OFFSET_IS_VALID (buf) &&
406       GST_BUFFER_OFFSET (buf) != self->offset) {
407     self->offset = GST_BUFFER_OFFSET (buf);
408     discont = TRUE;
409   }
410 
411   if (discont) {
412     GST_INFO ("discontinuity");
413     /* flush the parser state */
414     g_string_truncate (self->textbuf, 0);
415     gst_adapter_clear (self->adapter);
416     /* we could set a flag to make sure that the next buffer we push out also
417      * has the DISCONT flag set, but there's no point really given that it's
418      * subtitles which are discontinuous by nature. */
419   }
420 
421   self->offset += gst_buffer_get_size (buf);
422 
423   gst_adapter_push (self->adapter, buf);
424 
425   avail = gst_adapter_available (self->adapter);
426   data = gst_adapter_map (self->adapter, avail);
427   input = convert_encoding (self, (const gchar *) data, avail, &consumed);
428 
429   if (input && consumed > 0) {
430     if (self->textbuf) {
431       g_string_free (self->textbuf, TRUE);
432       self->textbuf = NULL;
433     }
434     self->textbuf = g_string_new (input);
435     gst_adapter_unmap (self->adapter);
436     gst_adapter_flush (self->adapter, consumed);
437   } else {
438     gst_adapter_unmap (self->adapter);
439   }
440 
441   g_free (input);
442 }
443 
444 static GstFlowReturn
handle_buffer(GstTtmlParse * self,GstBuffer * buf)445 handle_buffer (GstTtmlParse * self, GstBuffer * buf)
446 {
447   GstFlowReturn ret = GST_FLOW_OK;
448   GstCaps *caps = NULL;
449   GList *subtitle_list, *subtitle;
450   GstClockTime begin = GST_BUFFER_PTS (buf);
451   GstClockTime duration = GST_BUFFER_DURATION (buf);
452 
453   if (self->first_buffer) {
454     GstMapInfo map;
455 
456     gst_buffer_map (buf, &map, GST_MAP_READ);
457     self->detected_encoding = detect_encoding ((gchar *) map.data, map.size);
458     gst_buffer_unmap (buf, &map);
459     self->first_buffer = FALSE;
460   }
461 
462   feed_textbuf (self, buf);
463 
464   if (!(caps = gst_ttml_parse_get_src_caps (self)))
465     return GST_FLOW_EOS;
466   gst_caps_unref (caps);
467 
468   /* Push newsegment if needed */
469   if (self->need_segment) {
470     GST_LOG_OBJECT (self, "pushing newsegment event with %" GST_SEGMENT_FORMAT,
471         &self->segment);
472 
473     gst_pad_push_event (self->srcpad, gst_event_new_segment (&self->segment));
474     self->need_segment = FALSE;
475   }
476 
477   subtitle_list = ttml_parse (self->textbuf->str, begin, duration);
478 
479   for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) {
480     GstBuffer *op_buffer = subtitle->data;
481     self->segment.position = GST_BUFFER_PTS (op_buffer);
482 
483     ret = gst_pad_push (self->srcpad, op_buffer);
484 
485     if (ret != GST_FLOW_OK)
486       GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret));
487   }
488 
489   g_list_free (subtitle_list);
490   return ret;
491 }
492 
493 static GstFlowReturn
gst_ttml_parse_chain(GstPad * sinkpad,GstObject * parent,GstBuffer * buf)494 gst_ttml_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
495 {
496   GstTtmlParse *self = GST_TTML_PARSE (parent);
497   return handle_buffer (self, buf);
498 }
499 
500 static gboolean
gst_ttml_parse_sink_event(GstPad * pad,GstObject * parent,GstEvent * event)501 gst_ttml_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
502 {
503   GstTtmlParse *self = GST_TTML_PARSE (parent);
504   gboolean ret = FALSE;
505 
506   GST_DEBUG ("Handling %s event", GST_EVENT_TYPE_NAME (event));
507 
508   switch (GST_EVENT_TYPE (event)) {
509     case GST_EVENT_SEGMENT:
510     {
511       const GstSegment *s;
512       gst_event_parse_segment (event, &s);
513       if (s->format == GST_FORMAT_TIME)
514         gst_event_copy_segment (event, &self->segment);
515       GST_DEBUG_OBJECT (self, "newsegment (%s)",
516           gst_format_get_name (self->segment.format));
517 
518       /* if not time format, we'll either start with a 0 timestamp anyway or
519        * it's following a seek in which case we'll have saved the requested
520        * seek segment and don't want to overwrite it (remember that on a seek
521        * we always just seek back to the start in BYTES format and just throw
522        * away all text that's before the requested position; if the subtitles
523        * come from an upstream demuxer, it won't be able to handle our BYTES
524        * seek request and instead send us a newsegment from the seek request
525        * it received via its video pads instead, so all is fine then too) */
526       ret = TRUE;
527       self->need_segment = TRUE;
528       gst_event_unref (event);
529       break;
530     }
531     case GST_EVENT_CAPS:
532     {
533       GstCaps *caps;
534       gst_event_unref (event);
535 
536       caps = gst_ttml_parse_get_src_caps (self);
537       event = gst_event_new_caps (caps);
538       gst_caps_unref (caps);
539 
540       ret = gst_pad_push_event (self->srcpad, event);
541       break;
542     }
543     default:
544       ret = gst_pad_event_default (pad, parent, event);
545       break;
546   }
547 
548   return ret;
549 }
550 
551 static GstStateChangeReturn
gst_ttml_parse_change_state(GstElement * element,GstStateChange transition)552 gst_ttml_parse_change_state (GstElement * element, GstStateChange transition)
553 {
554   GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
555   GstTtmlParse *self = GST_TTML_PARSE (element);
556 
557   switch (transition) {
558     case GST_STATE_CHANGE_READY_TO_PAUSED:
559       /* format detection will init the parser state */
560       self->offset = 0;
561       self->valid_utf8 = TRUE;
562       self->first_buffer = TRUE;
563       g_free (self->detected_encoding);
564       self->detected_encoding = NULL;
565       g_string_truncate (self->textbuf, 0);
566       gst_adapter_clear (self->adapter);
567       break;
568     default:
569       break;
570   }
571 
572   ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
573   if (ret == GST_STATE_CHANGE_FAILURE)
574     return ret;
575 
576   switch (transition) {
577     case GST_STATE_CHANGE_PAUSED_TO_READY:
578       break;
579     default:
580       break;
581   }
582 
583   return ret;
584 }
585