1 /* GStreamer TTML subtitle parser
2  * Copyright (C) <2015> British Broadcasting Corporation
3  *   Authors:
4  *     Chris Bass <dash@rd.bbc.co.uk>
5  *     Peter Taylour <dash@rd.bbc.co.uk>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*
24  * Parses subtitle files encoded using the EBU-TT-D profile of TTML, as defined
25  * in https://tech.ebu.ch/files/live/sites/tech/files/shared/tech/tech3380.pdf
26  * and http://www.w3.org/TR/ttaf1-dfxp/, respectively.
27  */
28 
29 #include <glib.h>
30 
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <math.h>
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parser.h>
37 
38 #include "ttmlparse.h"
39 #include "subtitle.h"
40 #include "subtitlemeta.h"
41 
42 #define DEFAULT_CELLRES_X 32
43 #define DEFAULT_CELLRES_Y 15
44 #define MAX_FONT_FAMILY_NAME_LENGTH 128
45 #define NSECONDS_IN_DAY 24 * 3600 * GST_SECOND
46 
47 #define TTML_CHAR_NULL 0x00
48 #define TTML_CHAR_SPACE 0x20
49 #define TTML_CHAR_TAB 0x09
50 #define TTML_CHAR_LF 0x0A
51 #define TTML_CHAR_CR 0x0D
52 
53 GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
54 #define GST_CAT_DEFAULT ttmlparse_debug
55 
56 static gchar *ttml_get_xml_property (const xmlNode * node, const char *name);
57 static gpointer ttml_copy_tree_element (gconstpointer src, gpointer data);
58 
59 typedef struct _TtmlStyleSet TtmlStyleSet;
60 typedef struct _TtmlElement TtmlElement;
61 typedef struct _TtmlScene TtmlScene;
62 
63 typedef enum
64 {
65   TTML_ELEMENT_TYPE_STYLE,
66   TTML_ELEMENT_TYPE_REGION,
67   TTML_ELEMENT_TYPE_BODY,
68   TTML_ELEMENT_TYPE_DIV,
69   TTML_ELEMENT_TYPE_P,
70   TTML_ELEMENT_TYPE_SPAN,
71   TTML_ELEMENT_TYPE_ANON_SPAN,
72   TTML_ELEMENT_TYPE_BR
73 } TtmlElementType;
74 
75 typedef enum
76 {
77   TTML_WHITESPACE_MODE_NONE,
78   TTML_WHITESPACE_MODE_DEFAULT,
79   TTML_WHITESPACE_MODE_PRESERVE,
80 } TtmlWhitespaceMode;
81 
82 struct _TtmlElement
83 {
84   TtmlElementType type;
85   gchar *id;
86   TtmlWhitespaceMode whitespace_mode;
87   gchar **styles;
88   gchar *region;
89   GstClockTime begin;
90   GstClockTime end;
91   TtmlStyleSet *style_set;
92   gchar *text;
93 };
94 
95 /* Represents a static scene consisting of one or more trees of elements that
96  * should be visible over a specific period of time. */
97 struct _TtmlScene
98 {
99   GstClockTime begin;
100   GstClockTime end;
101   GList *trees;
102   GstBuffer *buf;
103 };
104 
105 struct _TtmlStyleSet
106 {
107   GHashTable *table;
108 };
109 
110 
111 static TtmlStyleSet *
ttml_style_set_new(void)112 ttml_style_set_new (void)
113 {
114   TtmlStyleSet *ret = g_slice_new0 (TtmlStyleSet);
115   ret->table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
116   return ret;
117 }
118 
119 
120 static void
ttml_style_set_delete(TtmlStyleSet * style_set)121 ttml_style_set_delete (TtmlStyleSet * style_set)
122 {
123   if (style_set) {
124     g_hash_table_unref (style_set->table);
125     g_slice_free (TtmlStyleSet, style_set);
126   }
127 }
128 
129 
130 /* If attribute with name @attr_name already exists in @style_set, its value
131  * will be replaced by @attr_value. */
132 static gboolean
ttml_style_set_add_attr(TtmlStyleSet * style_set,const gchar * attr_name,const gchar * attr_value)133 ttml_style_set_add_attr (TtmlStyleSet * style_set, const gchar * attr_name,
134     const gchar * attr_value)
135 {
136   return g_hash_table_insert (style_set->table, g_strdup (attr_name),
137       g_strdup (attr_value));
138 }
139 
140 
141 static gboolean
ttml_style_set_contains_attr(TtmlStyleSet * style_set,const gchar * attr_name)142 ttml_style_set_contains_attr (TtmlStyleSet * style_set, const gchar * attr_name)
143 {
144   return g_hash_table_contains (style_set->table, attr_name);
145 }
146 
147 
148 static const gchar *
ttml_style_set_get_attr(TtmlStyleSet * style_set,const gchar * attr_name)149 ttml_style_set_get_attr (TtmlStyleSet * style_set, const gchar * attr_name)
150 {
151   return g_hash_table_lookup (style_set->table, attr_name);
152 }
153 
154 
155 static guint8
ttml_hex_pair_to_byte(const gchar * hex_pair)156 ttml_hex_pair_to_byte (const gchar * hex_pair)
157 {
158   gint hi_digit, lo_digit;
159 
160   hi_digit = g_ascii_xdigit_value (*hex_pair);
161   lo_digit = g_ascii_xdigit_value (*(hex_pair + 1));
162   return (hi_digit << 4) + lo_digit;
163 }
164 
165 
166 /* Color strings in EBU-TT-D can have the form "#RRBBGG" or "#RRBBGGAA". */
167 static GstSubtitleColor
ttml_parse_colorstring(const gchar * color)168 ttml_parse_colorstring (const gchar * color)
169 {
170   guint length;
171   const gchar *c = NULL;
172   GstSubtitleColor ret = { 0, 0, 0, 0 };
173 
174   if (!color)
175     return ret;
176 
177   length = strlen (color);
178   if (((length == 7) || (length == 9)) && *color == '#') {
179     c = color + 1;
180 
181     ret.r = ttml_hex_pair_to_byte (c);
182     ret.g = ttml_hex_pair_to_byte (c + 2);
183     ret.b = ttml_hex_pair_to_byte (c + 4);
184 
185     if (length == 7)
186       ret.a = G_MAXUINT8;
187     else
188       ret.a = ttml_hex_pair_to_byte (c + 6);
189 
190     GST_CAT_LOG (ttmlparse_debug, "Returning color - r:%u  b:%u  g:%u  a:%u",
191         ret.r, ret.b, ret.g, ret.a);
192   } else {
193     GST_CAT_ERROR (ttmlparse_debug, "Invalid color string: %s", color);
194   }
195 
196   return ret;
197 }
198 
199 
200 static void
ttml_style_set_print(TtmlStyleSet * style_set)201 ttml_style_set_print (TtmlStyleSet * style_set)
202 {
203   GHashTableIter iter;
204   gpointer attr_name, attr_value;
205 
206   if (!style_set) {
207     GST_CAT_LOG (ttmlparse_debug, "\t\t[NULL]");
208     return;
209   }
210 
211   g_hash_table_iter_init (&iter, style_set->table);
212   while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
213     GST_CAT_LOG (ttmlparse_debug, "\t\t%s: %s", (const gchar *) attr_name,
214         (const gchar *) attr_value);
215   }
216 }
217 
218 
219 static TtmlStyleSet *
ttml_parse_style_set(const xmlNode * node)220 ttml_parse_style_set (const xmlNode * node)
221 {
222   TtmlStyleSet *s;
223   gchar *value = NULL;
224   xmlAttrPtr attr;
225 
226   value = ttml_get_xml_property (node, "id");
227   if (!value) {
228     GST_CAT_ERROR (ttmlparse_debug, "styles must have an ID.");
229     return NULL;
230   }
231   g_free (value);
232 
233   s = ttml_style_set_new ();
234 
235   for (attr = node->properties; attr != NULL; attr = attr->next) {
236     if (attr->ns && ((g_strcmp0 ((const gchar *) attr->ns->prefix, "tts") == 0)
237             || (g_strcmp0 ((const gchar *) attr->ns->prefix, "itts") == 0)
238             || (g_strcmp0 ((const gchar *) attr->ns->prefix, "ebutts") == 0))) {
239       ttml_style_set_add_attr (s, (const gchar *) attr->name,
240           (const gchar *) attr->children->content);
241     }
242   }
243 
244   return s;
245 }
246 
247 
248 static void
ttml_delete_element(TtmlElement * element)249 ttml_delete_element (TtmlElement * element)
250 {
251   g_free ((gpointer) element->id);
252   if (element->styles)
253     g_strfreev (element->styles);
254   g_free ((gpointer) element->region);
255   ttml_style_set_delete (element->style_set);
256   g_free ((gpointer) element->text);
257   g_slice_free (TtmlElement, element);
258 }
259 
260 
261 static gchar *
ttml_get_xml_property(const xmlNode * node,const char * name)262 ttml_get_xml_property (const xmlNode * node, const char *name)
263 {
264   xmlChar *xml_string = NULL;
265   gchar *gst_string = NULL;
266 
267   g_return_val_if_fail (strlen (name) < 128, NULL);
268 
269   xml_string = xmlGetProp (node, (xmlChar *) name);
270   if (!xml_string)
271     return NULL;
272   gst_string = g_strdup ((gchar *) xml_string);
273   xmlFree (xml_string);
274   return gst_string;
275 }
276 
277 
278 /* EBU-TT-D timecodes have format hours:minutes:seconds[.fraction] */
279 static GstClockTime
ttml_parse_timecode(const gchar * timestring)280 ttml_parse_timecode (const gchar * timestring)
281 {
282   gchar **strings;
283   guint64 hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
284   GstClockTime time = GST_CLOCK_TIME_NONE;
285 
286   GST_CAT_LOG (ttmlparse_debug, "time string: %s", timestring);
287 
288   strings = g_strsplit (timestring, ":", 3);
289   if (g_strv_length (strings) != 3U) {
290     GST_CAT_ERROR (ttmlparse_debug, "badly formatted time string: %s",
291         timestring);
292     return time;
293   }
294 
295   hours = g_ascii_strtoull (strings[0], NULL, 10U);
296   minutes = g_ascii_strtoull (strings[1], NULL, 10U);
297   if (g_strstr_len (strings[2], -1, ".")) {
298     guint n_digits;
299     gchar **substrings = g_strsplit (strings[2], ".", 2);
300     seconds = g_ascii_strtoull (substrings[0], NULL, 10U);
301     n_digits = strlen (substrings[1]);
302     milliseconds = g_ascii_strtoull (substrings[1], NULL, 10U);
303     milliseconds =
304         (guint64) (milliseconds * pow (10.0, (3 - (double) n_digits)));
305     g_strfreev (substrings);
306   } else {
307     seconds = g_ascii_strtoull (strings[2], NULL, 10U);
308   }
309 
310   if (minutes > 59 || seconds > 60) {
311     GST_CAT_ERROR (ttmlparse_debug, "invalid time string "
312         "(minutes or seconds out-of-bounds): %s\n", timestring);
313   }
314 
315   g_strfreev (strings);
316   GST_CAT_LOG (ttmlparse_debug,
317       "hours: %" G_GUINT64_FORMAT "  minutes: %" G_GUINT64_FORMAT
318       "  seconds: %" G_GUINT64_FORMAT "  milliseconds: %" G_GUINT64_FORMAT "",
319       hours, minutes, seconds, milliseconds);
320 
321   time = hours * GST_SECOND * 3600
322       + minutes * GST_SECOND * 60
323       + seconds * GST_SECOND + milliseconds * GST_MSECOND;
324 
325   return time;
326 }
327 
328 
329 static TtmlElement *
ttml_parse_element(const xmlNode * node)330 ttml_parse_element (const xmlNode * node)
331 {
332   TtmlElement *element;
333   TtmlElementType type;
334   gchar *value;
335 
336   GST_CAT_DEBUG (ttmlparse_debug, "Element name: %s",
337       (const char *) node->name);
338   if ((g_strcmp0 ((const char *) node->name, "style") == 0)) {
339     type = TTML_ELEMENT_TYPE_STYLE;
340   } else if ((g_strcmp0 ((const char *) node->name, "region") == 0)) {
341     type = TTML_ELEMENT_TYPE_REGION;
342   } else if ((g_strcmp0 ((const char *) node->name, "body") == 0)) {
343     type = TTML_ELEMENT_TYPE_BODY;
344   } else if ((g_strcmp0 ((const char *) node->name, "div") == 0)) {
345     type = TTML_ELEMENT_TYPE_DIV;
346   } else if ((g_strcmp0 ((const char *) node->name, "p") == 0)) {
347     type = TTML_ELEMENT_TYPE_P;
348   } else if ((g_strcmp0 ((const char *) node->name, "span") == 0)) {
349     type = TTML_ELEMENT_TYPE_SPAN;
350   } else if ((g_strcmp0 ((const char *) node->name, "text") == 0)) {
351     type = TTML_ELEMENT_TYPE_ANON_SPAN;
352   } else if ((g_strcmp0 ((const char *) node->name, "br") == 0)) {
353     type = TTML_ELEMENT_TYPE_BR;
354   } else {
355     return NULL;
356   }
357 
358   element = g_slice_new0 (TtmlElement);
359   element->type = type;
360 
361   if ((value = ttml_get_xml_property (node, "id"))) {
362     element->id = g_strdup (value);
363     g_free (value);
364   }
365 
366   if ((value = ttml_get_xml_property (node, "style"))) {
367     element->styles = g_strsplit (value, " ", 0);
368     GST_CAT_DEBUG (ttmlparse_debug, "%u style(s) referenced in element.",
369         g_strv_length (element->styles));
370     g_free (value);
371   }
372 
373   if (element->type == TTML_ELEMENT_TYPE_STYLE
374       || element->type == TTML_ELEMENT_TYPE_REGION) {
375     TtmlStyleSet *ss;
376     ss = ttml_parse_style_set (node);
377     if (ss)
378       element->style_set = ss;
379     else
380       GST_CAT_WARNING (ttmlparse_debug,
381           "Style or Region contains no styling attributes.");
382   }
383 
384   if ((value = ttml_get_xml_property (node, "region"))) {
385     element->region = g_strdup (value);
386     g_free (value);
387   }
388 
389   if ((value = ttml_get_xml_property (node, "begin"))) {
390     element->begin = ttml_parse_timecode (value);
391     g_free (value);
392   } else {
393     element->begin = GST_CLOCK_TIME_NONE;
394   }
395 
396   if ((value = ttml_get_xml_property (node, "end"))) {
397     element->end = ttml_parse_timecode (value);
398     g_free (value);
399   } else {
400     element->end = GST_CLOCK_TIME_NONE;
401   }
402 
403   if (node->content) {
404     GST_CAT_LOG (ttmlparse_debug, "Node content: %s", node->content);
405     element->text = g_strdup ((const gchar *) node->content);
406   }
407 
408   if (element->type == TTML_ELEMENT_TYPE_BR)
409     element->text = g_strdup ("\n");
410 
411   if ((value = ttml_get_xml_property (node, "space"))) {
412     if (g_strcmp0 (value, "preserve") == 0)
413       element->whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
414     else if (g_strcmp0 (value, "default") == 0)
415       element->whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
416     g_free (value);
417   }
418 
419   return element;
420 }
421 
422 
423 static GNode *
ttml_parse_body(const xmlNode * node)424 ttml_parse_body (const xmlNode * node)
425 {
426   GNode *ret;
427   TtmlElement *element;
428 
429   GST_CAT_LOG (ttmlparse_debug, "parsing node %s", node->name);
430   element = ttml_parse_element (node);
431   if (element)
432     ret = g_node_new (element);
433   else
434     return NULL;
435 
436   for (node = node->children; node != NULL; node = node->next) {
437     GNode *descendants = NULL;
438     if ((descendants = ttml_parse_body (node)))
439       g_node_append (ret, descendants);
440   }
441 
442   return ret;
443 }
444 
445 
446 /* Update the fields of a GstSubtitleStyleSet, @style_set, according to the
447  * values defined in a TtmlStyleSet, @tss, and a given cell resolution. */
448 static void
ttml_update_style_set(GstSubtitleStyleSet * style_set,TtmlStyleSet * tss,guint cellres_x,guint cellres_y)449 ttml_update_style_set (GstSubtitleStyleSet * style_set, TtmlStyleSet * tss,
450     guint cellres_x, guint cellres_y)
451 {
452   const gchar *attr;
453 
454   if ((attr = ttml_style_set_get_attr (tss, "textDirection"))) {
455     if (g_strcmp0 (attr, "rtl") == 0)
456       style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_RTL;
457     else
458       style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_LTR;
459   }
460 
461   if ((attr = ttml_style_set_get_attr (tss, "fontFamily"))) {
462     if (strlen (attr) <= MAX_FONT_FAMILY_NAME_LENGTH) {
463       g_free (style_set->font_family);
464       style_set->font_family = g_strdup (attr);
465     } else {
466       GST_CAT_WARNING (ttmlparse_debug,
467           "Ignoring font family name as it's overly long.");
468     }
469   }
470 
471   if ((attr = ttml_style_set_get_attr (tss, "fontSize"))) {
472     style_set->font_size = g_ascii_strtod (attr, NULL) / 100.0;
473   }
474   style_set->font_size *= (1.0 / cellres_y);
475 
476   if ((attr = ttml_style_set_get_attr (tss, "lineHeight"))) {
477     if (g_strcmp0 (attr, "normal") == 0)
478       style_set->line_height = -1;
479     else
480       style_set->line_height = g_ascii_strtod (attr, NULL) / 100.0;
481   }
482 
483   if ((attr = ttml_style_set_get_attr (tss, "textAlign"))) {
484     if (g_strcmp0 (attr, "left") == 0)
485       style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_LEFT;
486     else if (g_strcmp0 (attr, "center") == 0)
487       style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_CENTER;
488     else if (g_strcmp0 (attr, "right") == 0)
489       style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_RIGHT;
490     else if (g_strcmp0 (attr, "end") == 0)
491       style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_END;
492     else
493       style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_START;
494   }
495 
496   if ((attr = ttml_style_set_get_attr (tss, "color"))) {
497     style_set->color = ttml_parse_colorstring (attr);
498   }
499 
500   if ((attr = ttml_style_set_get_attr (tss, "backgroundColor"))) {
501     style_set->background_color = ttml_parse_colorstring (attr);
502   }
503 
504   if ((attr = ttml_style_set_get_attr (tss, "fontStyle"))) {
505     if (g_strcmp0 (attr, "italic") == 0)
506       style_set->font_style = GST_SUBTITLE_FONT_STYLE_ITALIC;
507     else
508       style_set->font_style = GST_SUBTITLE_FONT_STYLE_NORMAL;
509   }
510 
511   if ((attr = ttml_style_set_get_attr (tss, "fontWeight"))) {
512     if (g_strcmp0 (attr, "bold") == 0)
513       style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_BOLD;
514     else
515       style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_NORMAL;
516   }
517 
518   if ((attr = ttml_style_set_get_attr (tss, "textDecoration"))) {
519     if (g_strcmp0 (attr, "underline") == 0)
520       style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_UNDERLINE;
521     else
522       style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_NONE;
523   }
524 
525   if ((attr = ttml_style_set_get_attr (tss, "unicodeBidi"))) {
526     if (g_strcmp0 (attr, "embed") == 0)
527       style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_EMBED;
528     else if (g_strcmp0 (attr, "bidiOverride") == 0)
529       style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_OVERRIDE;
530     else
531       style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_NORMAL;
532   }
533 
534   if ((attr = ttml_style_set_get_attr (tss, "wrapOption"))) {
535     if (g_strcmp0 (attr, "noWrap") == 0)
536       style_set->wrap_option = GST_SUBTITLE_WRAPPING_OFF;
537     else
538       style_set->wrap_option = GST_SUBTITLE_WRAPPING_ON;
539   }
540 
541   if ((attr = ttml_style_set_get_attr (tss, "multiRowAlign"))) {
542     if (g_strcmp0 (attr, "start") == 0)
543       style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_START;
544     else if (g_strcmp0 (attr, "center") == 0)
545       style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_CENTER;
546     else if (g_strcmp0 (attr, "end") == 0)
547       style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_END;
548     else
549       style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_AUTO;
550   }
551 
552   if ((attr = ttml_style_set_get_attr (tss, "linePadding"))) {
553     style_set->line_padding = g_ascii_strtod (attr, NULL);
554     style_set->line_padding *= (1.0 / cellres_x);
555   }
556 
557   if ((attr = ttml_style_set_get_attr (tss, "origin"))) {
558     gchar *c;
559     style_set->origin_x = g_ascii_strtod (attr, &c) / 100.0;
560     while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
561       ++c;
562     style_set->origin_y = g_ascii_strtod (c, NULL) / 100.0;
563   }
564 
565   if ((attr = ttml_style_set_get_attr (tss, "extent"))) {
566     gchar *c;
567     style_set->extent_w = g_ascii_strtod (attr, &c) / 100.0;
568     if ((style_set->origin_x + style_set->extent_w) > 1.0) {
569       style_set->extent_w = 1.0 - style_set->origin_x;
570     }
571     while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
572       ++c;
573     style_set->extent_h = g_ascii_strtod (c, NULL) / 100.0;
574     if ((style_set->origin_y + style_set->extent_h) > 1.0) {
575       style_set->extent_h = 1.0 - style_set->origin_y;
576     }
577   }
578 
579   if ((attr = ttml_style_set_get_attr (tss, "displayAlign"))) {
580     if (g_strcmp0 (attr, "center") == 0)
581       style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_CENTER;
582     else if (g_strcmp0 (attr, "after") == 0)
583       style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_AFTER;
584     else
585       style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_BEFORE;
586   }
587 
588   if ((attr = ttml_style_set_get_attr (tss, "padding"))) {
589     gchar **decimals;
590     guint n_decimals;
591     guint i;
592 
593     decimals = g_strsplit (attr, "%", 0);
594     n_decimals = g_strv_length (decimals) - 1;
595     for (i = 0; i < n_decimals; ++i)
596       g_strstrip (decimals[i]);
597 
598     switch (n_decimals) {
599       case 1:
600         style_set->padding_start = style_set->padding_end =
601             style_set->padding_before = style_set->padding_after =
602             g_ascii_strtod (decimals[0], NULL) / 100.0;
603         break;
604 
605       case 2:
606         style_set->padding_before = style_set->padding_after =
607             g_ascii_strtod (decimals[0], NULL) / 100.0;
608         style_set->padding_start = style_set->padding_end =
609             g_ascii_strtod (decimals[1], NULL) / 100.0;
610         break;
611 
612       case 3:
613         style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
614         style_set->padding_start = style_set->padding_end =
615             g_ascii_strtod (decimals[1], NULL) / 100.0;
616         style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
617         break;
618 
619       case 4:
620         style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
621         style_set->padding_end = g_ascii_strtod (decimals[1], NULL) / 100.0;
622         style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
623         style_set->padding_start = g_ascii_strtod (decimals[3], NULL) / 100.0;
624         break;
625     }
626     g_strfreev (decimals);
627 
628     /* Padding values in TTML files are relative to the region width & height;
629      * make them relative to the overall display width & height like all other
630      * dimensions. */
631     style_set->padding_before *= style_set->extent_h;
632     style_set->padding_after *= style_set->extent_h;
633     style_set->padding_end *= style_set->extent_w;
634     style_set->padding_start *= style_set->extent_w;
635   }
636 
637   if ((attr = ttml_style_set_get_attr (tss, "writingMode"))) {
638     if (g_str_has_prefix (attr, "rl"))
639       style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_RLTB;
640     else if ((g_strcmp0 (attr, "tbrl") == 0)
641         || (g_strcmp0 (attr, "tb") == 0))
642       style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBRL;
643     else if (g_strcmp0 (attr, "tblr") == 0)
644       style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBLR;
645     else
646       style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_LRTB;
647   }
648 
649   if ((attr = ttml_style_set_get_attr (tss, "showBackground"))) {
650     if (g_strcmp0 (attr, "whenActive") == 0)
651       style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_WHEN_ACTIVE;
652     else
653       style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_ALWAYS;
654   }
655 
656   if ((attr = ttml_style_set_get_attr (tss, "overflow"))) {
657     if (g_strcmp0 (attr, "visible") == 0)
658       style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_VISIBLE;
659     else
660       style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_HIDDEN;
661   }
662 
663   if ((attr = ttml_style_set_get_attr (tss, "fillLineGap"))) {
664     if (g_strcmp0 (attr, "true") == 0)
665       style_set->fill_line_gap = TRUE;
666   }
667 }
668 
669 
670 static TtmlStyleSet *
ttml_style_set_copy(TtmlStyleSet * style_set)671 ttml_style_set_copy (TtmlStyleSet * style_set)
672 {
673   GHashTableIter iter;
674   gpointer attr_name, attr_value;
675   TtmlStyleSet *ret = ttml_style_set_new ();
676 
677   g_hash_table_iter_init (&iter, style_set->table);
678   while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
679     ttml_style_set_add_attr (ret, (const gchar *) attr_name,
680         (const gchar *) attr_value);
681   }
682 
683   return ret;
684 }
685 
686 
687 /* set2 overrides set1. Unlike style inheritance, merging will result in all
688  * values from set1 being merged into set2. */
689 static TtmlStyleSet *
ttml_style_set_merge(TtmlStyleSet * set1,TtmlStyleSet * set2)690 ttml_style_set_merge (TtmlStyleSet * set1, TtmlStyleSet * set2)
691 {
692   TtmlStyleSet *ret = NULL;
693 
694   if (set1) {
695     ret = ttml_style_set_copy (set1);
696 
697     if (set2) {
698       GHashTableIter iter;
699       gpointer attr_name, attr_value;
700 
701       g_hash_table_iter_init (&iter, set2->table);
702       while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
703         ttml_style_set_add_attr (ret, (const gchar *) attr_name,
704             (const gchar *) attr_value);
705       }
706     }
707   } else if (set2) {
708     ret = ttml_style_set_copy (set2);
709   }
710 
711   return ret;
712 }
713 
714 
715 static gchar *
ttml_get_relative_font_size(const gchar * parent_size,const gchar * child_size)716 ttml_get_relative_font_size (const gchar * parent_size,
717     const gchar * child_size)
718 {
719   guint psize = (guint) g_ascii_strtoull (parent_size, NULL, 10U);
720   guint csize = (guint) g_ascii_strtoull (child_size, NULL, 10U);
721   csize = (csize * psize) / 100U;
722   return g_strdup_printf ("%u%%", csize);
723 }
724 
725 
726 static TtmlStyleSet *
ttml_style_set_inherit(TtmlStyleSet * parent,TtmlStyleSet * child)727 ttml_style_set_inherit (TtmlStyleSet * parent, TtmlStyleSet * child)
728 {
729   TtmlStyleSet *ret = NULL;
730   GHashTableIter iter;
731   gpointer attr_name, attr_value;
732 
733   if (child) {
734     ret = ttml_style_set_copy (child);
735   } else {
736     ret = ttml_style_set_new ();
737   }
738 
739   if (!parent)
740     return ret;
741 
742   g_hash_table_iter_init (&iter, parent->table);
743   while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
744     /* In TTML, if an element which has a defined fontSize is the child of an
745      * element that also has a defined fontSize, the child's font size is
746      * relative to that of its parent. If its parent doesn't have a defined
747      * fontSize, then the child's fontSize is relative to the document's cell
748      * size. Therefore, if the former is true, we calculate the value of
749      * fontSize based on the parent's fontSize; otherwise, we simply keep
750      * the value defined in the child's style set. */
751     if (g_strcmp0 ((const gchar *) attr_name, "fontSize") == 0
752         && ttml_style_set_contains_attr (ret, "fontSize")) {
753       const gchar *original_child_font_size =
754           ttml_style_set_get_attr (ret, "fontSize");
755       gchar *scaled_child_font_size =
756           ttml_get_relative_font_size ((const gchar *) attr_value,
757           original_child_font_size);
758       GST_CAT_LOG (ttmlparse_debug, "Calculated font size: %s",
759           scaled_child_font_size);
760       ttml_style_set_add_attr (ret, (const gchar *) attr_name,
761           scaled_child_font_size);
762       g_free (scaled_child_font_size);
763     }
764 
765     /* Not all styling attributes are inherited in TTML. */
766     if (g_strcmp0 ((const gchar *) attr_name, "backgroundColor") != 0
767         && g_strcmp0 ((const gchar *) attr_name, "origin") != 0
768         && g_strcmp0 ((const gchar *) attr_name, "extent") != 0
769         && g_strcmp0 ((const gchar *) attr_name, "displayAlign") != 0
770         && g_strcmp0 ((const gchar *) attr_name, "overflow") != 0
771         && g_strcmp0 ((const gchar *) attr_name, "padding") != 0
772         && g_strcmp0 ((const gchar *) attr_name, "writingMode") != 0
773         && g_strcmp0 ((const gchar *) attr_name, "showBackground") != 0
774         && g_strcmp0 ((const gchar *) attr_name, "unicodeBidi") != 0) {
775       if (!ttml_style_set_contains_attr (ret, (const gchar *) attr_name)) {
776         ttml_style_set_add_attr (ret, (const gchar *) attr_name,
777             (const gchar *) attr_value);
778       }
779     }
780   }
781 
782   return ret;
783 }
784 
785 
786 /*
787  * Returns TRUE iff @element1 and @element2 reference the same set of styles.
788  * If neither @element1 nor @element2 reference any styles, they are considered
789  * to have matching styling and, hence, TRUE is returned.
790  */
791 static gboolean
ttml_element_styles_match(TtmlElement * element1,TtmlElement * element2)792 ttml_element_styles_match (TtmlElement * element1, TtmlElement * element2)
793 {
794   const gchar *const *strv;
795   gint i;
796 
797   if (!element1 || !element2 || (!element1->styles && element2->styles) ||
798       (element1->styles && !element2->styles))
799     return FALSE;
800 
801   if (!element1->styles && !element2->styles)
802     return TRUE;
803 
804   strv = (const gchar * const *) element2->styles;
805 
806   if (g_strv_length (element1->styles) != g_strv_length (element2->styles))
807     return FALSE;
808 
809   for (i = 0; i < g_strv_length (element1->styles); ++i) {
810     if (!g_strv_contains (strv, element1->styles[i]))
811       return FALSE;
812   }
813 
814   return TRUE;
815 }
816 
817 
818 static gchar *
ttml_get_element_type_string(TtmlElement * element)819 ttml_get_element_type_string (TtmlElement * element)
820 {
821   switch (element->type) {
822     case TTML_ELEMENT_TYPE_STYLE:
823       return g_strdup ("<style>");
824       break;
825     case TTML_ELEMENT_TYPE_REGION:
826       return g_strdup ("<region>");
827       break;
828     case TTML_ELEMENT_TYPE_BODY:
829       return g_strdup ("<body>");
830       break;
831     case TTML_ELEMENT_TYPE_DIV:
832       return g_strdup ("<div>");
833       break;
834     case TTML_ELEMENT_TYPE_P:
835       return g_strdup ("<p>");
836       break;
837     case TTML_ELEMENT_TYPE_SPAN:
838       return g_strdup ("<span>");
839       break;
840     case TTML_ELEMENT_TYPE_ANON_SPAN:
841       return g_strdup ("<anon-span>");
842       break;
843     case TTML_ELEMENT_TYPE_BR:
844       return g_strdup ("<br>");
845       break;
846     default:
847       return g_strdup ("Unknown");
848       break;
849   }
850 }
851 
852 
853 /* Merge styles referenced by an element. */
854 static gboolean
ttml_resolve_styles(GNode * node,gpointer data)855 ttml_resolve_styles (GNode * node, gpointer data)
856 {
857   TtmlStyleSet *tmp = NULL;
858   TtmlElement *element, *style;
859   GHashTable *styles_table;
860   gchar *type_string;
861   guint i;
862 
863   styles_table = (GHashTable *) data;
864   element = node->data;
865 
866   type_string = ttml_get_element_type_string (element);
867   GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
868   g_free (type_string);
869 
870   if (!element->styles)
871     return FALSE;
872 
873   for (i = 0; i < g_strv_length (element->styles); ++i) {
874     tmp = element->style_set;
875     style = g_hash_table_lookup (styles_table, element->styles[i]);
876     if (style) {
877       GST_CAT_LOG (ttmlparse_debug, "Merging style %s...", element->styles[i]);
878       element->style_set = ttml_style_set_merge (element->style_set,
879           style->style_set);
880       ttml_style_set_delete (tmp);
881     } else {
882       GST_CAT_WARNING (ttmlparse_debug,
883           "Element references an unknown style (%s)", element->styles[i]);
884     }
885   }
886 
887   GST_CAT_LOG (ttmlparse_debug, "Style set after merging:");
888   ttml_style_set_print (element->style_set);
889 
890   return FALSE;
891 }
892 
893 
894 static void
ttml_resolve_referenced_styles(GList * trees,GHashTable * styles_table)895 ttml_resolve_referenced_styles (GList * trees, GHashTable * styles_table)
896 {
897   GList *tree;
898 
899   for (tree = g_list_first (trees); tree; tree = tree->next) {
900     GNode *root = (GNode *) tree->data;
901     g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_resolve_styles,
902         styles_table);
903   }
904 }
905 
906 
907 /* Inherit styling attributes from parent. */
908 static gboolean
ttml_inherit_styles(GNode * node,gpointer data)909 ttml_inherit_styles (GNode * node, gpointer data)
910 {
911   TtmlStyleSet *tmp = NULL;
912   TtmlElement *element, *parent;
913   gchar *type_string;
914 
915   element = node->data;
916 
917   type_string = ttml_get_element_type_string (element);
918   GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
919   g_free (type_string);
920 
921   if (node->parent) {
922     parent = node->parent->data;
923     if (parent->style_set) {
924       tmp = element->style_set;
925       if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN ||
926           element->type == TTML_ELEMENT_TYPE_BR) {
927         element->style_set = ttml_style_set_merge (parent->style_set,
928             element->style_set);
929         element->styles = g_strdupv (parent->styles);
930       } else {
931         element->style_set = ttml_style_set_inherit (parent->style_set,
932             element->style_set);
933       }
934       ttml_style_set_delete (tmp);
935     }
936   }
937 
938   GST_CAT_LOG (ttmlparse_debug, "Style set after inheriting:");
939   ttml_style_set_print (element->style_set);
940 
941   return FALSE;
942 }
943 
944 
945 static void
ttml_inherit_element_styles(GList * trees)946 ttml_inherit_element_styles (GList * trees)
947 {
948   GList *tree;
949 
950   for (tree = g_list_first (trees); tree; tree = tree->next) {
951     GNode *root = (GNode *) tree->data;
952     g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_inherit_styles,
953         NULL);
954   }
955 }
956 
957 
958 /* If whitespace_mode isn't explicitly set for this element, inherit from its
959  * parent. If this element is the root of the tree, set whitespace_mode to
960  * that of the overall document. */
961 static gboolean
ttml_inherit_element_whitespace_mode(GNode * node,gpointer data)962 ttml_inherit_element_whitespace_mode (GNode * node, gpointer data)
963 {
964   TtmlWhitespaceMode *doc_mode = (TtmlWhitespaceMode *) data;
965   TtmlElement *element = node->data;
966   TtmlElement *parent;
967 
968   if (element->whitespace_mode != TTML_WHITESPACE_MODE_NONE)
969     return FALSE;
970 
971   if (G_NODE_IS_ROOT (node)) {
972     element->whitespace_mode = *doc_mode;
973     return FALSE;
974   }
975 
976   parent = node->parent->data;
977   element->whitespace_mode = parent->whitespace_mode;
978   return FALSE;
979 }
980 
981 
982 static void
ttml_inherit_whitespace_mode(GNode * tree,TtmlWhitespaceMode doc_mode)983 ttml_inherit_whitespace_mode (GNode * tree, TtmlWhitespaceMode doc_mode)
984 {
985   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
986       ttml_inherit_element_whitespace_mode, &doc_mode);
987 }
988 
989 
990 static gboolean
ttml_free_node_data(GNode * node,gpointer data)991 ttml_free_node_data (GNode * node, gpointer data)
992 {
993   TtmlElement *element = node->data;
994   ttml_delete_element (element);
995   return FALSE;
996 }
997 
998 
999 static void
ttml_delete_tree(GNode * tree)1000 ttml_delete_tree (GNode * tree)
1001 {
1002   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_free_node_data,
1003       NULL);
1004   g_node_destroy (tree);
1005 }
1006 
1007 
1008 typedef struct
1009 {
1010   GstClockTime begin;
1011   GstClockTime end;
1012 } ClipWindow;
1013 
1014 static gboolean
ttml_clip_element_period(GNode * node,gpointer data)1015 ttml_clip_element_period (GNode * node, gpointer data)
1016 {
1017   TtmlElement *element = node->data;
1018   ClipWindow *window = data;
1019 
1020   if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
1021     return FALSE;
1022   }
1023 
1024   if (element->begin > window->end || element->end < window->begin) {
1025     ttml_delete_tree (node);
1026     node = NULL;
1027     return FALSE;
1028   }
1029 
1030   element->begin = MAX (element->begin, window->begin);
1031   element->end = MIN (element->end, window->end);
1032   return FALSE;
1033 }
1034 
1035 
1036 static void
ttml_apply_time_window(GNode * tree,GstClockTime window_begin,GstClockTime window_end)1037 ttml_apply_time_window (GNode * tree, GstClockTime window_begin,
1038     GstClockTime window_end)
1039 {
1040   ClipWindow window;
1041   window.begin = window_begin;
1042   window.end = window_end;
1043 
1044   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
1045       ttml_clip_element_period, &window);
1046 }
1047 
1048 
1049 static gboolean
ttml_resolve_element_timings(GNode * node,gpointer data)1050 ttml_resolve_element_timings (GNode * node, gpointer data)
1051 {
1052   TtmlElement *element, *leaf;
1053 
1054   leaf = element = node->data;
1055 
1056   if (GST_CLOCK_TIME_IS_VALID (leaf->begin)
1057       && GST_CLOCK_TIME_IS_VALID (leaf->end)) {
1058     GST_CAT_LOG (ttmlparse_debug, "Leaf node already has timing.");
1059     return FALSE;
1060   }
1061 
1062   /* Inherit timings from ancestor. */
1063   while (node->parent && !GST_CLOCK_TIME_IS_VALID (element->begin)) {
1064     node = node->parent;
1065     element = node->data;
1066   }
1067 
1068   if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
1069     GST_CAT_WARNING (ttmlparse_debug,
1070         "No timing found for element; setting to Root Temporal Extent.");
1071     leaf->begin = 0;
1072     leaf->end = NSECONDS_IN_DAY;
1073   } else {
1074     leaf->begin = element->begin;
1075     leaf->end = element->end;
1076     GST_CAT_LOG (ttmlparse_debug, "Leaf begin: %" GST_TIME_FORMAT,
1077         GST_TIME_ARGS (leaf->begin));
1078     GST_CAT_LOG (ttmlparse_debug, "Leaf end: %" GST_TIME_FORMAT,
1079         GST_TIME_ARGS (leaf->end));
1080   }
1081 
1082   return FALSE;
1083 }
1084 
1085 
1086 static void
ttml_resolve_timings(GNode * tree)1087 ttml_resolve_timings (GNode * tree)
1088 {
1089   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1090       ttml_resolve_element_timings, NULL);
1091 }
1092 
1093 
1094 static gboolean
ttml_resolve_leaf_region(GNode * node,gpointer data)1095 ttml_resolve_leaf_region (GNode * node, gpointer data)
1096 {
1097   TtmlElement *element, *leaf;
1098   leaf = element = node->data;
1099 
1100   while (node->parent && !element->region) {
1101     node = node->parent;
1102     element = node->data;
1103   }
1104 
1105   if (element->region) {
1106     leaf->region = g_strdup (element->region);
1107     GST_CAT_LOG (ttmlparse_debug, "Leaf region: %s", leaf->region);
1108   } else {
1109     GST_CAT_WARNING (ttmlparse_debug, "No region found above leaf element.");
1110   }
1111 
1112   return FALSE;
1113 }
1114 
1115 
1116 static void
ttml_resolve_regions(GNode * tree)1117 ttml_resolve_regions (GNode * tree)
1118 {
1119   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1120       ttml_resolve_leaf_region, NULL);
1121 }
1122 
1123 
1124 typedef struct
1125 {
1126   GstClockTime start_time;
1127   GstClockTime next_transition_time;
1128 } TrState;
1129 
1130 
1131 static gboolean
ttml_update_transition_time(GNode * node,gpointer data)1132 ttml_update_transition_time (GNode * node, gpointer data)
1133 {
1134   TtmlElement *element = node->data;
1135   TrState *state = (TrState *) data;
1136 
1137   if ((element->begin < state->next_transition_time)
1138       && (!GST_CLOCK_TIME_IS_VALID (state->start_time)
1139           || (element->begin > state->start_time))) {
1140     state->next_transition_time = element->begin;
1141     GST_CAT_LOG (ttmlparse_debug,
1142         "Updating next transition time to element begin time (%"
1143         GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
1144     return FALSE;
1145   }
1146 
1147   if ((element->end < state->next_transition_time)
1148       && (element->end > state->start_time)) {
1149     state->next_transition_time = element->end;
1150     GST_CAT_LOG (ttmlparse_debug,
1151         "Updating next transition time to element end time (%"
1152         GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
1153   }
1154 
1155   return FALSE;
1156 }
1157 
1158 
1159 /* Return details about the next transition after @time. */
1160 static GstClockTime
ttml_find_next_transition(GList * trees,GstClockTime time)1161 ttml_find_next_transition (GList * trees, GstClockTime time)
1162 {
1163   TrState state;
1164   state.start_time = time;
1165   state.next_transition_time = GST_CLOCK_TIME_NONE;
1166 
1167   for (trees = g_list_first (trees); trees; trees = trees->next) {
1168     GNode *tree = (GNode *) trees->data;
1169     g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
1170         ttml_update_transition_time, &state);
1171   }
1172 
1173   GST_CAT_LOG (ttmlparse_debug, "Next transition is at %" GST_TIME_FORMAT,
1174       GST_TIME_ARGS (state.next_transition_time));
1175 
1176   return state.next_transition_time;
1177 }
1178 
1179 
1180 /* Remove nodes from tree that are not visible at @time. */
1181 static GNode *
ttml_remove_nodes_by_time(GNode * node,GstClockTime time)1182 ttml_remove_nodes_by_time (GNode * node, GstClockTime time)
1183 {
1184   GNode *child, *next_child;
1185   TtmlElement *element;
1186   element = node->data;
1187 
1188   child = node->children;
1189   next_child = child ? child->next : NULL;
1190   while (child) {
1191     ttml_remove_nodes_by_time (child, time);
1192     child = next_child;
1193     next_child = child ? child->next : NULL;
1194   }
1195 
1196   if (!node->children && ((element->begin > time) || (element->end <= time))) {
1197     ttml_delete_tree (node);
1198     node = NULL;
1199   }
1200 
1201   return node;
1202 }
1203 
1204 
1205 /* Return a list of trees containing the elements and their ancestors that are
1206  * visible at @time. */
1207 static GList *
ttml_get_active_trees(GList * element_trees,GstClockTime time)1208 ttml_get_active_trees (GList * element_trees, GstClockTime time)
1209 {
1210   GList *tree;
1211   GList *ret = NULL;
1212 
1213   for (tree = g_list_first (element_trees); tree; tree = tree->next) {
1214     GNode *root = g_node_copy_deep ((GNode *) tree->data,
1215         ttml_copy_tree_element, NULL);
1216     GST_CAT_LOG (ttmlparse_debug, "There are %u nodes in tree.",
1217         g_node_n_nodes (root, G_TRAVERSE_ALL));
1218     root = ttml_remove_nodes_by_time (root, time);
1219     if (root) {
1220       GST_CAT_LOG (ttmlparse_debug,
1221           "After filtering there are %u nodes in tree.", g_node_n_nodes (root,
1222               G_TRAVERSE_ALL));
1223 
1224       ret = g_list_append (ret, root);
1225     } else {
1226       GST_CAT_LOG (ttmlparse_debug,
1227           "All elements have been filtered from tree.");
1228     }
1229   }
1230 
1231   GST_CAT_DEBUG (ttmlparse_debug, "There are %u trees in returned list.",
1232       g_list_length (ret));
1233   return ret;
1234 }
1235 
1236 
1237 static GList *
ttml_create_scenes(GList * region_trees)1238 ttml_create_scenes (GList * region_trees)
1239 {
1240   TtmlScene *cur_scene = NULL;
1241   GList *output_scenes = NULL;
1242   GList *active_trees = NULL;
1243   GstClockTime timestamp = GST_CLOCK_TIME_NONE;
1244 
1245   while ((timestamp = ttml_find_next_transition (region_trees, timestamp))
1246       != GST_CLOCK_TIME_NONE) {
1247     GST_CAT_LOG (ttmlparse_debug,
1248         "Next transition found at time %" GST_TIME_FORMAT,
1249         GST_TIME_ARGS (timestamp));
1250     if (cur_scene) {
1251       cur_scene->end = timestamp;
1252       output_scenes = g_list_append (output_scenes, cur_scene);
1253     }
1254 
1255     active_trees = ttml_get_active_trees (region_trees, timestamp);
1256     GST_CAT_LOG (ttmlparse_debug, "There will be %u active regions after "
1257         "transition", g_list_length (active_trees));
1258 
1259     if (active_trees) {
1260       cur_scene = g_slice_new0 (TtmlScene);
1261       cur_scene->begin = timestamp;
1262       cur_scene->trees = active_trees;
1263     } else {
1264       cur_scene = NULL;
1265     }
1266   }
1267 
1268   return output_scenes;
1269 }
1270 
1271 
1272 /* Handle element whitespace in accordance with section 7.2.3 of the TTML
1273  * specification. Specifically, this function implements the
1274  * white-space-collapse="true" and linefeed-treatment="treat-as-space"
1275  * behaviours. Note that stripping of whitespace at the start and end of line
1276  * areas (suppress-at-line-break="auto" and
1277  * white-space-treatment="ignore-if-surrounding-linefeed" behaviours) can only
1278  * be done by the renderer once the text from multiple elements has been laid
1279  * out in line areas. */
1280 static gboolean
ttml_handle_element_whitespace(GNode * node,gpointer data)1281 ttml_handle_element_whitespace (GNode * node, gpointer data)
1282 {
1283   TtmlElement *element = node->data;
1284   guint space_count = 0;
1285   guint textlen;
1286   gchar *c;
1287 
1288   if (!element->text || (element->type == TTML_ELEMENT_TYPE_BR) ||
1289       (element->whitespace_mode == TTML_WHITESPACE_MODE_PRESERVE)) {
1290     return FALSE;
1291   }
1292 
1293   textlen = strlen (element->text);
1294   for (c = element->text; TRUE; c = g_utf8_next_char (c)) {
1295 
1296     gchar buf[6] = { 0 };
1297     gunichar u = g_utf8_get_char (c);
1298     gint nbytes = g_unichar_to_utf8 (u, buf);
1299 
1300     /* Repace each newline or tab with a space. */
1301     if (nbytes == 1 && (buf[0] == TTML_CHAR_LF || buf[0] == TTML_CHAR_TAB)) {
1302       *c = ' ';
1303       buf[0] = TTML_CHAR_SPACE;
1304     }
1305 
1306     /* Collapse runs of whitespace. */
1307     if (nbytes == 1 && (buf[0] == TTML_CHAR_SPACE || buf[0] == TTML_CHAR_CR)) {
1308       ++space_count;
1309     } else {
1310       if (space_count > 1) {
1311         gchar *new_head = c - space_count + 1;
1312         g_strlcpy (new_head, c, textlen);
1313         c = new_head;
1314       }
1315       space_count = 0;
1316       if (nbytes == 1 && buf[0] == TTML_CHAR_NULL)
1317         break;
1318     }
1319   }
1320 
1321   return FALSE;
1322 }
1323 
1324 
1325 static void
ttml_handle_whitespace(GNode * tree)1326 ttml_handle_whitespace (GNode * tree)
1327 {
1328   g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1329       ttml_handle_element_whitespace, NULL);
1330 }
1331 
1332 
1333 static GNode *
ttml_filter_content_nodes(GNode * node)1334 ttml_filter_content_nodes (GNode * node)
1335 {
1336   GNode *child, *next_child;
1337   TtmlElement *element = node->data;
1338   TtmlElement *parent = node->parent ? node->parent->data : NULL;
1339 
1340   child = node->children;
1341   next_child = child ? child->next : NULL;
1342   while (child) {
1343     ttml_filter_content_nodes (child);
1344     child = next_child;
1345     next_child = child ? child->next : NULL;
1346   }
1347 
1348   /* Only text content in <p>s and <span>s is significant. */
1349   if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN
1350       && parent->type != TTML_ELEMENT_TYPE_P
1351       && parent->type != TTML_ELEMENT_TYPE_SPAN) {
1352     ttml_delete_element (element);
1353     g_node_destroy (node);
1354     node = NULL;
1355   }
1356 
1357   return node;
1358 }
1359 
1360 
1361 /* Store in @table child elements of @node with name @element_name. A child
1362  * element with the same ID as an existing entry in @table will overwrite the
1363  * existing entry. */
1364 static void
ttml_store_unique_children(xmlNodePtr node,const gchar * element_name,GHashTable * table)1365 ttml_store_unique_children (xmlNodePtr node, const gchar * element_name,
1366     GHashTable * table)
1367 {
1368   xmlNodePtr ptr;
1369 
1370   for (ptr = node->children; ptr; ptr = ptr->next) {
1371     if (xmlStrcmp (ptr->name, (const xmlChar *) element_name) == 0) {
1372       TtmlElement *element = ttml_parse_element (ptr);
1373       gboolean new_key;
1374 
1375       if (element) {
1376         new_key = g_hash_table_insert (table, g_strdup (element->id), element);
1377         if (!new_key)
1378           GST_CAT_WARNING (ttmlparse_debug,
1379               "Document contains two %s elements with the same ID (\"%s\").",
1380               element_name, element->id);
1381       }
1382     }
1383   }
1384 }
1385 
1386 
1387 /* Parse style and region elements from @head and store in their respective
1388  * hash tables for future reference. */
1389 static void
ttml_parse_head(xmlNodePtr head,GHashTable * styles_table,GHashTable * regions_table)1390 ttml_parse_head (xmlNodePtr head, GHashTable * styles_table,
1391     GHashTable * regions_table)
1392 {
1393   xmlNodePtr node;
1394 
1395   for (node = head->children; node; node = node->next) {
1396     if (xmlStrcmp (node->name, (const xmlChar *) "styling") == 0)
1397       ttml_store_unique_children (node, "style", styles_table);
1398     if (xmlStrcmp (node->name, (const xmlChar *) "layout") == 0)
1399       ttml_store_unique_children (node, "region", regions_table);
1400   }
1401 }
1402 
1403 
1404 /* Remove nodes that do not belong to @region, or are not an ancestor of a node
1405  * belonging to @region. */
1406 static GNode *
ttml_remove_nodes_by_region(GNode * node,const gchar * region)1407 ttml_remove_nodes_by_region (GNode * node, const gchar * region)
1408 {
1409   GNode *child, *next_child;
1410   TtmlElement *element;
1411   element = node->data;
1412 
1413   child = node->children;
1414   next_child = child ? child->next : NULL;
1415   while (child) {
1416     ttml_remove_nodes_by_region (child, region);
1417     child = next_child;
1418     next_child = child ? child->next : NULL;
1419   }
1420 
1421   if ((element->type == TTML_ELEMENT_TYPE_ANON_SPAN
1422           || element->type != TTML_ELEMENT_TYPE_BR)
1423       && element->region && (g_strcmp0 (element->region, region) != 0)) {
1424     ttml_delete_element (element);
1425     g_node_destroy (node);
1426     return NULL;
1427   }
1428   if (element->type != TTML_ELEMENT_TYPE_ANON_SPAN
1429       && element->type != TTML_ELEMENT_TYPE_BR && !node->children) {
1430     ttml_delete_element (element);
1431     g_node_destroy (node);
1432     return NULL;
1433   }
1434 
1435   return node;
1436 }
1437 
1438 
1439 static TtmlElement *
ttml_copy_element(const TtmlElement * element)1440 ttml_copy_element (const TtmlElement * element)
1441 {
1442   TtmlElement *ret = g_slice_new0 (TtmlElement);
1443 
1444   ret->type = element->type;
1445   if (element->id)
1446     ret->id = g_strdup (element->id);
1447   ret->whitespace_mode = element->whitespace_mode;
1448   if (element->styles)
1449     ret->styles = g_strdupv (element->styles);
1450   if (element->region)
1451     ret->region = g_strdup (element->region);
1452   ret->begin = element->begin;
1453   ret->end = element->end;
1454   if (element->style_set)
1455     ret->style_set = ttml_style_set_copy (element->style_set);
1456   if (element->text)
1457     ret->text = g_strdup (element->text);
1458 
1459   return ret;
1460 }
1461 
1462 
1463 static gpointer
ttml_copy_tree_element(gconstpointer src,gpointer data)1464 ttml_copy_tree_element (gconstpointer src, gpointer data)
1465 {
1466   return ttml_copy_element ((TtmlElement *) src);
1467 }
1468 
1469 
1470 /* Split the body tree into a set of trees, each containing only the elements
1471  * belonging to a single region. Returns a list of trees, one per region, each
1472  * with the corresponding region element at its root. */
1473 static GList *
ttml_split_body_by_region(GNode * body,GHashTable * regions)1474 ttml_split_body_by_region (GNode * body, GHashTable * regions)
1475 {
1476   GHashTableIter iter;
1477   gpointer key, value;
1478   GList *ret = NULL;
1479 
1480   g_hash_table_iter_init (&iter, regions);
1481   while (g_hash_table_iter_next (&iter, &key, &value)) {
1482     gchar *region_name = (gchar *) key;
1483     TtmlElement *region = (TtmlElement *) value;
1484     GNode *region_node = g_node_new (ttml_copy_element (region));
1485     GNode *body_copy = g_node_copy_deep (body, ttml_copy_tree_element, NULL);
1486 
1487     GST_CAT_DEBUG (ttmlparse_debug, "Creating tree for region %s", region_name);
1488     GST_CAT_LOG (ttmlparse_debug, "Copy of body has %u nodes.",
1489         g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
1490 
1491     body_copy = ttml_remove_nodes_by_region (body_copy, region_name);
1492     if (body_copy) {
1493       GST_CAT_LOG (ttmlparse_debug, "Copy of body now has %u nodes.",
1494           g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
1495 
1496       /* Reparent tree to region node. */
1497       g_node_prepend (region_node, body_copy);
1498     }
1499     GST_CAT_LOG (ttmlparse_debug, "Final tree has %u nodes.",
1500         g_node_n_nodes (region_node, G_TRAVERSE_ALL));
1501     ret = g_list_append (ret, region_node);
1502   }
1503 
1504   GST_CAT_DEBUG (ttmlparse_debug, "Returning %u trees.", g_list_length (ret));
1505   return ret;
1506 }
1507 
1508 
1509 static gint
ttml_add_text_to_buffer(GstBuffer * buf,const gchar * text)1510 ttml_add_text_to_buffer (GstBuffer * buf, const gchar * text)
1511 {
1512   GstMemory *mem;
1513   GstMapInfo map;
1514   guint ret;
1515 
1516   if (gst_buffer_n_memory (buf) == gst_buffer_get_max_memory ())
1517     return -1;
1518 
1519   mem = gst_allocator_alloc (NULL, strlen (text) + 1, NULL);
1520   if (!gst_memory_map (mem, &map, GST_MAP_WRITE))
1521     GST_CAT_ERROR (ttmlparse_debug, "Failed to map memory.");
1522 
1523   g_strlcpy ((gchar *) map.data, text, map.size);
1524   GST_CAT_DEBUG (ttmlparse_debug, "Inserted following text into buffer: \"%s\"",
1525       (gchar *) map.data);
1526   gst_memory_unmap (mem, &map);
1527 
1528   ret = gst_buffer_n_memory (buf);
1529   gst_buffer_insert_memory (buf, -1, mem);
1530   return ret;
1531 }
1532 
1533 
1534 /* Create a GstSubtitleElement from @element, add it to @block, and insert its
1535  * associated text in @buf. */
1536 static gboolean
ttml_add_element(GstSubtitleBlock * block,TtmlElement * element,GstBuffer * buf,guint cellres_x,guint cellres_y)1537 ttml_add_element (GstSubtitleBlock * block, TtmlElement * element,
1538     GstBuffer * buf, guint cellres_x, guint cellres_y)
1539 {
1540   GstSubtitleStyleSet *element_style = NULL;
1541   guint buffer_index;
1542   GstSubtitleElement *sub_element = NULL;
1543 
1544   buffer_index = ttml_add_text_to_buffer (buf, element->text);
1545   if (buffer_index == -1) {
1546     GST_CAT_WARNING (ttmlparse_debug,
1547         "Reached maximum element count for buffer - discarding element.");
1548     return FALSE;
1549   }
1550 
1551   GST_CAT_DEBUG (ttmlparse_debug, "Inserted text at index %u in GstBuffer.",
1552       buffer_index);
1553 
1554   element_style = gst_subtitle_style_set_new ();
1555   ttml_update_style_set (element_style, element->style_set,
1556       cellres_x, cellres_y);
1557   sub_element = gst_subtitle_element_new (element_style, buffer_index,
1558       (element->whitespace_mode != TTML_WHITESPACE_MODE_PRESERVE));
1559 
1560   gst_subtitle_block_add_element (block, sub_element);
1561   GST_CAT_DEBUG (ttmlparse_debug,
1562       "Added element to block; there are now %u elements in the block.",
1563       gst_subtitle_block_get_element_count (block));
1564   return TRUE;
1565 }
1566 
1567 
1568 /* Return TRUE if @color is totally transparent. */
1569 static gboolean
ttml_color_is_transparent(const GstSubtitleColor * color)1570 ttml_color_is_transparent (const GstSubtitleColor * color)
1571 {
1572   if (!color)
1573     return FALSE;
1574   else
1575     return (color->a == 0);
1576 }
1577 
1578 
1579 /* Blend @color2 over @color1 and return the resulting color. This is currently
1580  * a dummy implementation that simply returns color2 as long as it's
1581  * not fully transparent. */
1582 /* TODO: Implement actual blending of colors. */
1583 static GstSubtitleColor
ttml_blend_colors(GstSubtitleColor color1,GstSubtitleColor color2)1584 ttml_blend_colors (GstSubtitleColor color1, GstSubtitleColor color2)
1585 {
1586   if (ttml_color_is_transparent (&color2))
1587     return color1;
1588   else
1589     return color2;
1590 }
1591 
1592 
1593 static void
ttml_warn_of_mispositioned_element(TtmlElement * element)1594 ttml_warn_of_mispositioned_element (TtmlElement * element)
1595 {
1596   gchar *type = ttml_get_element_type_string (element);
1597   GST_CAT_WARNING (ttmlparse_debug, "Ignoring illegally positioned %s element.",
1598       type);
1599   g_free (type);
1600 }
1601 
1602 
1603 /* Create the subtitle region and its child blocks and elements for @tree,
1604  * inserting element text in @buf. Ownership of created region is transferred
1605  * to caller. */
1606 static GstSubtitleRegion *
ttml_create_subtitle_region(GNode * tree,GstBuffer * buf,guint cellres_x,guint cellres_y)1607 ttml_create_subtitle_region (GNode * tree, GstBuffer * buf, guint cellres_x,
1608     guint cellres_y)
1609 {
1610   GstSubtitleRegion *region = NULL;
1611   GstSubtitleStyleSet *region_style;
1612   GstSubtitleColor block_color;
1613   TtmlElement *element;
1614   GNode *node;
1615 
1616   element = tree->data;         /* Region element */
1617   region_style = gst_subtitle_style_set_new ();
1618   ttml_update_style_set (region_style, element->style_set, cellres_x,
1619       cellres_y);
1620   region = gst_subtitle_region_new (region_style);
1621 
1622   node = tree->children;
1623   if (!node)
1624     return region;
1625 
1626   element = node->data;         /* Body element */
1627   block_color =
1628       ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1629           "backgroundColor"));
1630 
1631   for (node = node->children; node; node = node->next) {
1632     GNode *p_node;
1633     GstSubtitleColor div_color;
1634 
1635     element = node->data;
1636     if (element->type != TTML_ELEMENT_TYPE_DIV) {
1637       ttml_warn_of_mispositioned_element (element);
1638       continue;
1639     }
1640     div_color =
1641         ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1642             "backgroundColor"));
1643     block_color = ttml_blend_colors (block_color, div_color);
1644 
1645     for (p_node = node->children; p_node; p_node = p_node->next) {
1646       GstSubtitleBlock *block = NULL;
1647       GstSubtitleStyleSet *block_style;
1648       GNode *content_node;
1649       GstSubtitleColor p_color;
1650 
1651       element = p_node->data;
1652       if (element->type != TTML_ELEMENT_TYPE_P) {
1653         ttml_warn_of_mispositioned_element (element);
1654         continue;
1655       }
1656       p_color =
1657           ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1658               "backgroundColor"));
1659       block_color = ttml_blend_colors (block_color, p_color);
1660 
1661       block_style = gst_subtitle_style_set_new ();
1662       ttml_update_style_set (block_style, element->style_set, cellres_x,
1663           cellres_y);
1664       block_style->background_color = block_color;
1665       block = gst_subtitle_block_new (block_style);
1666 
1667       for (content_node = p_node->children; content_node;
1668           content_node = content_node->next) {
1669         GNode *anon_node;
1670         element = content_node->data;
1671 
1672         if (element->type == TTML_ELEMENT_TYPE_BR
1673             || element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
1674           if (!ttml_add_element (block, element, buf, cellres_x, cellres_y))
1675             GST_CAT_WARNING (ttmlparse_debug,
1676                 "Failed to add element to buffer.");
1677         } else if (element->type == TTML_ELEMENT_TYPE_SPAN) {
1678           /* Loop through anon-span children of this span. */
1679           for (anon_node = content_node->children; anon_node;
1680               anon_node = anon_node->next) {
1681             element = anon_node->data;
1682 
1683             if (element->type == TTML_ELEMENT_TYPE_BR
1684                 || element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
1685               if (!ttml_add_element (block, element, buf, cellres_x, cellres_y))
1686                 GST_CAT_WARNING (ttmlparse_debug,
1687                     "Failed to add element to buffer.");
1688             } else {
1689               ttml_warn_of_mispositioned_element (element);
1690             }
1691           }
1692         } else {
1693           ttml_warn_of_mispositioned_element (element);
1694         }
1695       }
1696 
1697       if (gst_subtitle_block_get_element_count (block) > 0) {
1698         gst_subtitle_region_add_block (region, block);
1699         GST_CAT_DEBUG (ttmlparse_debug,
1700             "Added block to region; there are now %u blocks in the region.",
1701             gst_subtitle_region_get_block_count (region));
1702       } else {
1703         gst_subtitle_block_unref (block);
1704       }
1705     }
1706   }
1707 
1708   return region;
1709 }
1710 
1711 
1712 /* For each scene, create data objects to describe the layout and styling of
1713  * that scene and attach it as metadata to the GstBuffer that will be used to
1714  * carry that scene's text. */
1715 static void
ttml_attach_scene_metadata(GList * scenes,guint cellres_x,guint cellres_y)1716 ttml_attach_scene_metadata (GList * scenes, guint cellres_x, guint cellres_y)
1717 {
1718   GList *scene_entry;
1719 
1720   for (scene_entry = g_list_first (scenes); scene_entry;
1721       scene_entry = scene_entry->next) {
1722     TtmlScene *scene = scene_entry->data;
1723     GList *region_tree;
1724     GPtrArray *regions = g_ptr_array_new_with_free_func (
1725         (GDestroyNotify) gst_subtitle_region_unref);
1726 
1727     scene->buf = gst_buffer_new ();
1728     GST_BUFFER_PTS (scene->buf) = scene->begin;
1729     GST_BUFFER_DURATION (scene->buf) = (scene->end - scene->begin);
1730 
1731     for (region_tree = g_list_first (scene->trees); region_tree;
1732         region_tree = region_tree->next) {
1733       GNode *tree = (GNode *) region_tree->data;
1734       GstSubtitleRegion *region;
1735 
1736       region = ttml_create_subtitle_region (tree, scene->buf, cellres_x,
1737           cellres_y);
1738       if (region)
1739         g_ptr_array_add (regions, region);
1740     }
1741 
1742     gst_buffer_add_subtitle_meta (scene->buf, regions);
1743   }
1744 }
1745 
1746 
1747 static GList *
create_buffer_list(GList * scenes)1748 create_buffer_list (GList * scenes)
1749 {
1750   GList *ret = NULL;
1751 
1752   while (scenes) {
1753     TtmlScene *scene = scenes->data;
1754     ret = g_list_prepend (ret, gst_buffer_ref (scene->buf));
1755     scenes = scenes->next;
1756   }
1757   return g_list_reverse (ret);
1758 }
1759 
1760 
1761 static void
ttml_delete_scene(TtmlScene * scene)1762 ttml_delete_scene (TtmlScene * scene)
1763 {
1764   if (scene->trees)
1765     g_list_free_full (scene->trees, (GDestroyNotify) ttml_delete_tree);
1766   if (scene->buf)
1767     gst_buffer_unref (scene->buf);
1768   g_slice_free (TtmlScene, scene);
1769 }
1770 
1771 
1772 static void
ttml_assign_region_times(GList * region_trees,GstClockTime doc_begin,GstClockTime doc_duration)1773 ttml_assign_region_times (GList * region_trees, GstClockTime doc_begin,
1774     GstClockTime doc_duration)
1775 {
1776   GList *tree;
1777 
1778   for (tree = g_list_first (region_trees); tree; tree = tree->next) {
1779     GNode *region_node = (GNode *) tree->data;
1780     TtmlElement *region = (TtmlElement *) region_node->data;
1781     const gchar *show_background_value =
1782         ttml_style_set_get_attr (region->style_set, "showBackground");
1783     gboolean always_visible =
1784         (g_strcmp0 (show_background_value, "whenActive") != 0);
1785 
1786     GstSubtitleColor region_color = { 0, 0, 0, 0 };
1787     if (ttml_style_set_contains_attr (region->style_set, "backgroundColor"))
1788       region_color =
1789           ttml_parse_colorstring (ttml_style_set_get_attr (region->style_set,
1790               "backgroundColor"));
1791 
1792     if (always_visible && !ttml_color_is_transparent (&region_color)) {
1793       GST_CAT_DEBUG (ttmlparse_debug, "Assigning times to region.");
1794       /* If the input XML document was not encapsulated in a container that
1795        * provides timing information for the document as a whole (i.e., its
1796        * PTS and duration) and the region background should be always visible,
1797        * set region start time to 0 and end time to 24 hours. This ensures that
1798        * regions with showBackground="always" are visible for the entirety of
1799        * any real-world stream. */
1800       region->begin = (doc_begin != GST_CLOCK_TIME_NONE) ? doc_begin : 0;
1801       region->end = (doc_duration != GST_CLOCK_TIME_NONE) ?
1802           region->begin + doc_duration : NSECONDS_IN_DAY;
1803     }
1804   }
1805 }
1806 
1807 
1808 /*
1809  * Promotes @node to the position of its parent, setting the prev, next and
1810  * parent pointers of @node to that of its original parent. The replaced parent
1811  * is freed. Should be called only on nodes that are the sole child of their
1812  * parent, otherwise sibling nodes may be leaked.
1813  */
1814 static void
ttml_promote_node(GNode * node)1815 ttml_promote_node (GNode * node)
1816 {
1817   GNode *parent_node = node->parent;
1818   TtmlElement *parent_element;
1819 
1820   if (!parent_node)
1821     return;
1822   parent_element = (TtmlElement *) parent_node->data;
1823 
1824   node->prev = parent_node->prev;
1825   if (!node->prev)
1826     parent_node->parent->children = node;
1827   else
1828     node->prev->next = node;
1829   node->next = parent_node->next;
1830   if (node->next)
1831     node->next->prev = node;
1832   node->parent = parent_node->parent;
1833 
1834   parent_node->prev = parent_node->next = NULL;
1835   parent_node->parent = parent_node->children = NULL;
1836   g_node_destroy (parent_node);
1837   ttml_delete_element (parent_element);
1838 }
1839 
1840 
1841 /*
1842  * Returns TRUE if @element is of a type that can be joined with another
1843  * joinable element.
1844  */
1845 static gboolean
ttml_element_is_joinable(TtmlElement * element)1846 ttml_element_is_joinable (TtmlElement * element)
1847 {
1848   return element->type == TTML_ELEMENT_TYPE_ANON_SPAN ||
1849       element->type == TTML_ELEMENT_TYPE_BR;
1850 }
1851 
1852 
1853 /* Joins adjacent inline element in @tree that have the same styling. */
1854 static void
ttml_join_region_tree_inline_elements(GNode * tree)1855 ttml_join_region_tree_inline_elements (GNode * tree)
1856 {
1857   GNode *n1, *n2;
1858 
1859   for (n1 = tree; n1; n1 = n1->next) {
1860     if (n1->children) {
1861       TtmlElement *element = (TtmlElement *) n1->data;
1862       ttml_join_region_tree_inline_elements (n1->children);
1863       if (element->type == TTML_ELEMENT_TYPE_SPAN &&
1864           g_node_n_children (n1) == 1) {
1865         GNode *child = n1->children;
1866         if (n1 == tree)
1867           tree = child;
1868         ttml_promote_node (child);
1869         n1 = child;
1870       }
1871     }
1872   }
1873 
1874   n1 = tree;
1875   n2 = tree->next;
1876 
1877   while (n1 && n2) {
1878     TtmlElement *e1 = (TtmlElement *) n1->data;
1879     TtmlElement *e2 = (TtmlElement *) n2->data;
1880 
1881     if (ttml_element_is_joinable (e1) &&
1882         ttml_element_is_joinable (e2) && ttml_element_styles_match (e1, e2)) {
1883       gchar *tmp = e1->text;
1884       GST_CAT_LOG (ttmlparse_debug,
1885           "Joining adjacent element text \"%s\" & \"%s\"", e1->text, e2->text);
1886       e1->text = g_strconcat (e1->text, e2->text, NULL);
1887       e1->type = TTML_ELEMENT_TYPE_ANON_SPAN;
1888       g_free (tmp);
1889 
1890       ttml_delete_element (e2);
1891       g_node_destroy (n2);
1892       n2 = n1->next;
1893     } else {
1894       n1 = n2;
1895       n2 = n2->next;
1896     }
1897   }
1898 }
1899 
1900 
1901 static void
ttml_join_inline_elements(GList * scenes)1902 ttml_join_inline_elements (GList * scenes)
1903 {
1904   GList *scene_entry;
1905 
1906   for (scene_entry = g_list_first (scenes); scene_entry;
1907       scene_entry = scene_entry->next) {
1908     TtmlScene *scene = scene_entry->data;
1909     GList *region_tree;
1910 
1911     for (region_tree = g_list_first (scene->trees); region_tree;
1912         region_tree = region_tree->next) {
1913       GNode *tree = (GNode *) region_tree->data;
1914       ttml_join_region_tree_inline_elements (tree);
1915     }
1916   }
1917 }
1918 
1919 
1920 static xmlNodePtr
ttml_find_child(xmlNodePtr parent,const gchar * name)1921 ttml_find_child (xmlNodePtr parent, const gchar * name)
1922 {
1923   xmlNodePtr child = parent->children;
1924   while (child && xmlStrcmp (child->name, (const xmlChar *) name) != 0)
1925     child = child->next;
1926   return child;
1927 }
1928 
1929 
1930 GList *
ttml_parse(const gchar * input,GstClockTime begin,GstClockTime duration)1931 ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration)
1932 {
1933   xmlDocPtr doc;
1934   xmlNodePtr root_node, head_node, body_node;
1935 
1936   GHashTable *styles_table, *regions_table;
1937   GList *output_buffers = NULL;
1938   gchar *value;
1939   guint cellres_x, cellres_y;
1940   TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
1941 
1942   if (!g_utf8_validate (input, -1, NULL)) {
1943     GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
1944     return NULL;
1945   }
1946   GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
1947 
1948   styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
1949       (GDestroyNotify) ttml_delete_element);
1950   regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
1951       (GDestroyNotify) ttml_delete_element);
1952 
1953   /* Parse input. */
1954   doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0);
1955   if (!doc) {
1956     GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
1957     return NULL;
1958   }
1959   root_node = xmlDocGetRootElement (doc);
1960 
1961   if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
1962     GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
1963     xmlFreeDoc (doc);
1964     return NULL;
1965   }
1966 
1967   if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
1968     gchar *ptr = value;
1969     cellres_x = (guint) g_ascii_strtoull (ptr, &ptr, 10U);
1970     cellres_y = (guint) g_ascii_strtoull (ptr, NULL, 10U);
1971     g_free (value);
1972   } else {
1973     cellres_x = DEFAULT_CELLRES_X;
1974     cellres_y = DEFAULT_CELLRES_Y;
1975   }
1976 
1977   GST_CAT_DEBUG (ttmlparse_debug, "cellres_x: %u   cellres_y: %u", cellres_x,
1978       cellres_y);
1979 
1980   if ((value = ttml_get_xml_property (root_node, "space"))) {
1981     if (g_strcmp0 (value, "preserve") == 0) {
1982       GST_CAT_DEBUG (ttmlparse_debug, "Preserving whitespace...");
1983       doc_whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
1984     }
1985     g_free (value);
1986   }
1987 
1988   if (!(head_node = ttml_find_child (root_node, "head"))) {
1989     GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
1990     xmlFreeDoc (doc);
1991     return NULL;
1992   }
1993   ttml_parse_head (head_node, styles_table, regions_table);
1994 
1995   if ((body_node = ttml_find_child (root_node, "body"))) {
1996     GNode *body_tree;
1997     GList *region_trees = NULL;
1998     GList *scenes = NULL;
1999 
2000     body_tree = ttml_parse_body (body_node);
2001     GST_CAT_LOG (ttmlparse_debug, "body_tree tree contains %u nodes.",
2002         g_node_n_nodes (body_tree, G_TRAVERSE_ALL));
2003     GST_CAT_LOG (ttmlparse_debug, "body_tree tree height is %u",
2004         g_node_max_height (body_tree));
2005 
2006     ttml_inherit_whitespace_mode (body_tree, doc_whitespace_mode);
2007     ttml_handle_whitespace (body_tree);
2008     ttml_filter_content_nodes (body_tree);
2009     if (GST_CLOCK_TIME_IS_VALID (begin) && GST_CLOCK_TIME_IS_VALID (duration))
2010       ttml_apply_time_window (body_tree, begin, begin + duration);
2011     ttml_resolve_timings (body_tree);
2012     ttml_resolve_regions (body_tree);
2013     region_trees = ttml_split_body_by_region (body_tree, regions_table);
2014     ttml_resolve_referenced_styles (region_trees, styles_table);
2015     ttml_inherit_element_styles (region_trees);
2016     ttml_assign_region_times (region_trees, begin, duration);
2017     scenes = ttml_create_scenes (region_trees);
2018     GST_CAT_LOG (ttmlparse_debug, "There are %u scenes in all.",
2019         g_list_length (scenes));
2020     ttml_join_inline_elements (scenes);
2021     ttml_attach_scene_metadata (scenes, cellres_x, cellres_y);
2022     output_buffers = create_buffer_list (scenes);
2023 
2024     g_list_free_full (scenes, (GDestroyNotify) ttml_delete_scene);
2025     g_list_free_full (region_trees, (GDestroyNotify) ttml_delete_tree);
2026     ttml_delete_tree (body_tree);
2027   }
2028 
2029   xmlFreeDoc (doc);
2030   g_hash_table_destroy (styles_table);
2031   g_hash_table_destroy (regions_table);
2032 
2033   return output_buffers;
2034 }
2035