1 /* GStreamer License Utility Functions
2  * Copyright (C) 2011 Tim-Philipp Müller <tim centricular net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 /* mklicensestables.c:
21  * little program that reads liblicense's license RDF files and outputs tables
22  * with the most important information, so we don't have to parse megabytes
23  * of mostly redundant RDF files to get some basic information (and vendors
24  * don't have to ship it all).
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include "tag.h"
32 
33 #include <string.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 
37 /* TODO: we can merge some of the jurisdiction-only license table entries
38  * into one entry with multiple jurisdictions and without the 'generic' flag,
39  * .e.g. by-nc-nd/2.5/es + by-nc-nd/2.5/au => by-nc-nd/2.5/{es,au} */
40 
41 #define LIBLICENSE_DATA_PREFIX "/usr/share/liblicense/licenses"
42 
43 static GHashTable *unknown_sources;     /* NULL */
44 
45 static GList *licenses;         /* NULL */
46 
47 /* list of languages used for translations */
48 static GList *langs;            /* NULL */
49 
50 /* keep in sync with licenses.c */
51 static const gchar jurisdictions[] =
52     "ar\000at\000au\000be\000bg\000br\000ca\000ch\000cl\000cn\000co\000de\000"
53     "dk\000es\000fi\000fr\000hr\000hu\000il\000in\000it\000jp\000kr\000mk\000"
54     "mt\000mx\000my\000nl\000pe\000pl\000pt\000scotland\000se\000si\000tw\000"
55     "uk\000us\000za";
56 
57 /* keep in sync with gst_tag_get_license_version() */
58 static const gchar known_versions[] = "1.0/2.0/2.1/2.5/3.0/";
59 
60 /* is this license 'generic' (and a base for any of the supported
61  * jurisdictions), or jurisdiction-specific only? */
62 #define JURISDICTION_GENERIC (G_GUINT64_CONSTANT (1) << 63)
63 
64 typedef struct
65 {
66   gchar *ref;
67   guint64 jurisdiction;
68   gchar *jurisdiction_suffix;   /* if not generic (e.g. "jp/") */
69   gchar *legalcode;
70   gchar *version;
71   gchar *replaced_by;
72   gchar *source;
73 
74   GstTagLicenseFlags flags;
75 
76   gboolean deprecated;
77 
78   GHashTable *titles;
79   GHashTable *descriptions;
80 
81   /* for processing */
82   const gchar *cur_lang;
83   gboolean packed_into_source;
84 
85   /* list of licenses packed into this one (ie. this is the source of those) */
86   GList *derived;
87 } License;
88 
89 static GstTagLicenseFlags
ref_to_flag(const gchar * ref)90 ref_to_flag (const gchar * ref)
91 {
92   if (strcmp (ref, "http://creativecommons.org/ns#Reproduction") == 0)
93     return GST_TAG_LICENSE_PERMITS_REPRODUCTION;
94   if (strcmp (ref, "http://creativecommons.org/ns#Distribution") == 0)
95     return GST_TAG_LICENSE_PERMITS_DISTRIBUTION;
96   if (strcmp (ref, "http://creativecommons.org/ns#DerivativeWorks") == 0)
97     return GST_TAG_LICENSE_PERMITS_DERIVATIVE_WORKS;
98   if (strcmp (ref, "http://creativecommons.org/ns#Sharing") == 0)
99     return GST_TAG_LICENSE_PERMITS_SHARING;
100   if (strcmp (ref, "http://creativecommons.org/ns#Notice") == 0)
101     return GST_TAG_LICENSE_REQUIRES_NOTICE;
102   if (strcmp (ref, "http://creativecommons.org/ns#Attribution") == 0)
103     return GST_TAG_LICENSE_REQUIRES_ATTRIBUTION;
104   if (strcmp (ref, "http://creativecommons.org/ns#ShareAlike") == 0)
105     return GST_TAG_LICENSE_REQUIRES_SHARE_ALIKE;
106   if (strcmp (ref, "http://creativecommons.org/ns#SourceCode") == 0)
107     return GST_TAG_LICENSE_REQUIRES_SOURCE_CODE;
108   if (strcmp (ref, "http://creativecommons.org/ns#Copyleft") == 0)
109     return GST_TAG_LICENSE_REQUIRES_COPYLEFT;
110   if (strcmp (ref, "http://creativecommons.org/ns#LesserCopyleft") == 0)
111     return GST_TAG_LICENSE_REQUIRES_LESSER_COPYLEFT;
112   if (strcmp (ref, "http://creativecommons.org/ns#CommercialUse") == 0)
113     return GST_TAG_LICENSE_PROHIBITS_COMMERCIAL_USE;
114   if (strcmp (ref, "http://creativecommons.org/ns#HighIncomeNationUse") == 0)
115     return GST_TAG_LICENSE_PROHIBITS_HIGH_INCOME_NATION_USE;
116 
117   g_error ("Unknown permits/requires/prohibits: %s\n", ref);
118   return 0;
119 };
120 
121 static guint64
ref_to_jurisdiction(const gchar * ref)122 ref_to_jurisdiction (const gchar * ref)
123 {
124   const gchar *j = jurisdictions;
125   gchar *jur;
126   guint64 bit = 1;
127 
128   jur = g_strdup (ref + strlen ("http://creativecommons.org/international/"));
129   g_strdelimit (jur, "/", '\0');
130   while (j < jurisdictions + sizeof (jurisdictions)) {
131     if (strcmp (j, jur) == 0) {
132       g_free (jur);
133       g_assert (bit != 0 && bit != JURISDICTION_GENERIC);
134       return bit;
135     }
136     j += strlen (j) + 1;
137     bit <<= 1;
138   }
139   g_error ("Unknown jurisdiction '%s'\n", ref);
140   return JURISDICTION_GENERIC;
141 }
142 
143 typedef enum
144 {
145   TAG_CC_LICENSE,
146   TAG_CC_JURISDICTION,
147   TAG_CC_LEGALCODE,
148   TAG_CC_PROHIBITS,
149   TAG_CC_REQUIRES,
150   TAG_CC_PERMITS,
151   TAG_CC_DEPRECATED_ON,
152   TAG_DC_CREATOR,
153   TAG_DC_SOURCE,
154   TAG_DC_TITLE,
155   TAG_DC_DESCRIPTION,
156   TAG_DCQ_HAS_VERSION,
157   TAG_DCQ_IS_REPLACED_BY,
158   TAG_RDF_RDF,
159   TAG_RDF_DESCRIPTION,
160 } Tag;
161 
162 static const struct
163 {
164   const gchar *element_name;
165   const gchar *attribute;
166   const Tag element_tag;
167 } tag_map[] = {
168   {
169   "cc:License", "rdf:about", TAG_CC_LICENSE}, {
170   "cc:deprecatedOn", "rdf:datatype", TAG_CC_DEPRECATED_ON}, {
171   "cc:jurisdiction", "rdf:resource", TAG_CC_JURISDICTION}, {
172   "cc:legalcode", "rdf:resource", TAG_CC_LEGALCODE}, {
173   "cc:prohibits", "rdf:resource", TAG_CC_PROHIBITS}, {
174   "cc:requires", "rdf:resource", TAG_CC_REQUIRES}, {
175   "cc:permits", "rdf:resource", TAG_CC_PERMITS}, {
176   "dc:creator", "rdf:resource", TAG_DC_CREATOR}, {
177   "dc:source", "rdf:resource", TAG_DC_SOURCE}, {
178   "dc:title", "xml:lang", TAG_DC_TITLE}, {
179   "dc:description", "xml:lang", TAG_DC_DESCRIPTION}, {
180   "dcq:hasVersion", NULL, TAG_DCQ_HAS_VERSION}, {
181   "dcq:isReplacedBy", "rdf:resource", TAG_DCQ_IS_REPLACED_BY}, {
182   "rdf:RDF", NULL, TAG_RDF_RDF}, {
183   "rdf:Description", "rdf:about", TAG_RDF_DESCRIPTION},
184       /* these three are just for by-nc-nd_2.0_jp_.rdf */
185   {
186   "dc:isBasedOn", "rdf:resource", TAG_DC_SOURCE}, {
187   "dc:hasVersion", NULL, TAG_DCQ_HAS_VERSION}, {
188   "dc:isReplacedBy", "rdf:resource", TAG_DCQ_IS_REPLACED_BY}
189 };
190 
191 static void
parse_start(GMarkupParseContext * ctx,const gchar * element_name,const gchar ** attr_names,const gchar ** attr_vals,gpointer user_data,GError ** err)192 parse_start (GMarkupParseContext * ctx, const gchar * element_name,
193     const gchar ** attr_names, const gchar ** attr_vals,
194     gpointer user_data, GError ** err)
195 {
196   License *license = user_data;
197   const gchar *ref = NULL;
198   int i;
199 
200   for (i = 0; i < G_N_ELEMENTS (tag_map); ++i) {
201     if (strcmp (element_name, tag_map[i].element_name) == 0)
202       break;
203   }
204 
205   if (i == G_N_ELEMENTS (tag_map))
206     g_error ("Unexpected tag '%s'\n", element_name);
207 
208   if (tag_map[i].attribute == NULL)
209     return;
210 
211   if (!g_markup_collect_attributes (element_name, attr_names, attr_vals,
212           err, G_MARKUP_COLLECT_STRING, tag_map[i].attribute, &ref,
213           G_MARKUP_COLLECT_INVALID)) {
214     return;
215   }
216 
217   switch (tag_map[i].element_tag) {
218     case TAG_CC_LICENSE:
219       if (!g_str_has_prefix (ref, "http://creativecommons.org/licenses/"))
220         g_error ("Unexpected license reference: %s\n", ref);
221       /* we assume one license per file, and CC license ref */
222       g_assert (license->ref == NULL);
223       license->ref = g_strdup (ref);
224       break;
225     case TAG_CC_JURISDICTION:
226       if (!g_str_has_prefix (ref, "http://creativecommons.org/international/"))
227         g_error ("Unknown license jurisdiction: %s\n", ref);
228       /* we assume one jurisdiction per license */
229       g_assert (license->jurisdiction == JURISDICTION_GENERIC);
230       license->jurisdiction = ref_to_jurisdiction (ref);
231       license->jurisdiction_suffix =
232           g_strdup (ref + strlen ("http://creativecommons.org/international/"));
233       break;
234     case TAG_CC_LEGALCODE:
235       if (!g_str_has_prefix (ref, "http://creativecommons.org/licenses/"))
236         g_error ("Unexpected legalcode reference: %s\n", ref);
237       /* we assume one legalcode per license */
238       g_assert (license->legalcode == NULL);
239       license->legalcode = g_strdup (ref);
240       break;
241     case TAG_DC_CREATOR:
242       if (strcmp (ref, "http://creativecommons.org") == 0) {
243         license->flags |= GST_TAG_LICENSE_CREATIVE_COMMONS_LICENSE;
244       } else if (strcmp (ref, "http://fsf.org") == 0) {
245         license->flags |= GST_TAG_LICENSE_FREE_SOFTWARE_FOUNDATION_LICENSE;
246       } else {
247         g_error ("Unknown license creator: %s\n", ref);
248       }
249       break;
250     case TAG_CC_DEPRECATED_ON:
251       break;
252     case TAG_CC_PROHIBITS:
253     case TAG_CC_REQUIRES:
254     case TAG_CC_PERMITS:
255       license->flags |= ref_to_flag (ref);
256       break;
257     case TAG_DC_TITLE:{
258       gchar *cur_lang;
259 
260       cur_lang = g_strdelimit (g_strdup (ref), "-", '_');
261       license->cur_lang = g_intern_string (cur_lang);
262       if (!g_list_find_custom (langs, cur_lang, (GCompareFunc) strcmp))
263         langs = g_list_prepend (langs, (gpointer) license->cur_lang);
264 
265       g_free (cur_lang);
266       break;
267     }
268     case TAG_DC_DESCRIPTION:{
269       gchar *cur_lang;
270 
271       cur_lang = g_strdelimit (g_strdup (ref), "-", '_');
272       license->cur_lang = g_intern_string (cur_lang);
273       if (!g_list_find_custom (langs, cur_lang, (GCompareFunc) strcmp))
274         langs = g_list_prepend (langs, (gpointer) license->cur_lang);
275 
276       g_free (cur_lang);
277       break;
278     }
279     case TAG_DCQ_IS_REPLACED_BY:
280       /* we assume one replacer per license for now */
281       g_assert (license->replaced_by == NULL);
282       license->replaced_by = g_strdup (ref);
283       break;
284     case TAG_RDF_DESCRIPTION:
285       if (!g_str_has_prefix (ref, "http://creativecommons.org/licenses/"))
286         g_error ("Unexpected license reference: %s\n", ref);
287       if (license->ref != NULL && strcmp (license->ref, ref) != 0) {
288         gchar *f, *r = g_strdup (ref);
289 
290         /* work around bug in some of the RDFs ... */
291         if ((f = strstr (r, "by-nc-nd"))) {
292           memcpy (f, "by-nd-nc", 8);
293         }
294         if (strcmp (license->ref, r) != 0) {
295           g_error ("rdf:Description chunk for other than current license");
296         }
297         g_free (r);
298       }
299       break;
300     case TAG_DC_SOURCE:
301       if (!g_str_has_prefix (ref, "http://creativecommons.org/licenses/"))
302         g_error ("Unexpected source reference: %s\n", ref);
303       /* we assume one source (for jurisdiction-specific versions) */
304       g_assert (license->source == NULL);
305       license->source = g_strdup (ref);
306       break;
307     default:
308       g_printerr ("unhandled start tag: %s\n", element_name);
309       break;
310   }
311 }
312 
313 static void
parse_text(GMarkupParseContext * ctx,const gchar * text,gsize text_len,gpointer user_data,GError ** err)314 parse_text (GMarkupParseContext * ctx, const gchar * text, gsize text_len,
315     gpointer user_data, GError ** err)
316 {
317   License *license = user_data;
318   const gchar *element_name, *found;
319   int i;
320 
321   element_name = g_markup_parse_context_get_element (ctx);
322   for (i = 0; i < G_N_ELEMENTS (tag_map); ++i) {
323     if (strcmp (element_name, tag_map[i].element_name) == 0)
324       break;
325   }
326 
327   if (i == G_N_ELEMENTS (tag_map))
328     g_error ("Unexpected tag '%s'\n", element_name);
329 
330   switch (tag_map[i].element_tag) {
331     case TAG_CC_LICENSE:
332     case TAG_CC_JURISDICTION:
333     case TAG_CC_LEGALCODE:
334     case TAG_DC_CREATOR:
335     case TAG_CC_PROHIBITS:
336     case TAG_CC_REQUIRES:
337     case TAG_CC_PERMITS:
338     case TAG_RDF_RDF:
339     case TAG_RDF_DESCRIPTION:
340       break;
341     case TAG_DC_TITLE:
342       if (license->titles == NULL) {
343         license->titles = g_hash_table_new (g_str_hash, g_str_equal);
344       }
345       g_hash_table_insert (license->titles, (gpointer) license->cur_lang,
346           (gpointer) g_intern_string (text));
347       break;
348     case TAG_DC_DESCRIPTION:{
349       gchar *txt = g_strdup (text);
350 
351       if (license->descriptions == NULL) {
352         license->descriptions = g_hash_table_new (g_str_hash, g_str_equal);
353       }
354       g_strdelimit (txt, "\n", ' ');
355       g_hash_table_insert (license->descriptions, (gpointer) license->cur_lang,
356           (gpointer) g_intern_string (txt));
357       g_free (txt);
358       break;
359     }
360     case TAG_DCQ_HAS_VERSION:
361       /* we assume one version per license */
362       g_assert (license->version == NULL);
363       license->version = g_strdup (text);
364       found = strstr (known_versions, license->version);
365       if (found == NULL || found[strlen (license->version)] != '/')
366         g_error ("Unexpected version '%s', please add to table.", text);
367       break;
368     case TAG_CC_DEPRECATED_ON:
369       license->deprecated = TRUE;
370       break;
371     case TAG_DC_SOURCE:        // FIXME
372     default:
373       g_print ("text (%s) (%s): '%s'\n", element_name, license->cur_lang, text);
374   }
375 }
376 
377 static void
parse_passthrough(GMarkupParseContext * ctx,const gchar * text,gsize len,gpointer user_data,GError ** err)378 parse_passthrough (GMarkupParseContext * ctx, const gchar * text, gsize len,
379     gpointer user_data, GError ** err)
380 {
381   if (!g_str_has_prefix (text, "<?xml ")) {
382     g_error ("Unexpected passthrough text: %s\n", text);
383   }
384 }
385 
386 static void
parse_error(GMarkupParseContext * ctx,GError * err,gpointer data)387 parse_error (GMarkupParseContext * ctx, GError * err, gpointer data)
388 {
389   g_error ("parse error: %s\n", err->message);
390 }
391 
392 static const GMarkupParser license_rdf_parser = {
393   parse_start, NULL, parse_text, parse_passthrough, parse_error
394 };
395 
396 static void
parse_license_rdf(const gchar * fn,const gchar * rdf)397 parse_license_rdf (const gchar * fn, const gchar * rdf)
398 {
399   GMarkupParseContext *ctx;
400   License *license;
401   GError *err = NULL;
402 
403   if (!g_utf8_validate (rdf, -1, NULL)) {
404     g_error ("%s is not valid UTF-8\n", fn);
405   }
406 
407   license = g_new0 (License, 1);
408 
409   /* mark as generic until proven otherwise */
410   license->jurisdiction = JURISDICTION_GENERIC;
411 
412   ctx = g_markup_parse_context_new (&license_rdf_parser,
413       G_MARKUP_TREAT_CDATA_AS_TEXT, license, NULL);
414 
415   /* g_print ("Parsing %s\n", fn); */
416 
417   if (!g_markup_parse_context_parse (ctx, rdf, -1, &err)) {
418     g_error ("Error parsing file %s: %s\n", fn, err->message);
419     g_clear_error (&err);
420   }
421 
422   licenses = g_list_append (licenses, license);
423 
424   g_markup_parse_context_free (ctx);
425 }
426 
427 static void
read_licenses(const gchar * licenses_dir)428 read_licenses (const gchar * licenses_dir)
429 {
430   const gchar *name;
431   GError *err = NULL;
432   GDir *dir;
433 
434   dir = g_dir_open (licenses_dir, 0, &err);
435 
436   if (dir == NULL)
437     g_error ("Failed to g_dir_open('%s'): %s", licenses_dir, err->message);
438   g_clear_error (&err);
439 
440   while ((name = g_dir_read_name (dir))) {
441     gchar *fn, *rdf;
442 
443     fn = g_build_filename (licenses_dir, name, NULL);
444     if (g_file_get_contents (fn, &rdf, NULL, &err)) {
445       parse_license_rdf (fn, rdf);
446       g_free (rdf);
447     } else {
448       g_printerr ("Could not read file '%s': %s\n", fn, err->message);
449       g_clear_error (&err);
450       err = NULL;
451     }
452     g_free (fn);
453   }
454 
455   g_dir_close (dir);
456 }
457 
458 static License *
find_license(const gchar * ref)459 find_license (const gchar * ref)
460 {
461   GList *l;
462 
463   if (!g_str_has_prefix (ref, "http://creativecommons.org/"))
464     return NULL;
465 
466   for (l = licenses; l != NULL; l = l->next) {
467     License *license = l->data;
468 
469     if (strcmp (license->ref, ref) == 0)
470       return license;
471   }
472 
473   return NULL;
474 }
475 
476 static int
license_ref_cmp(License * a,License * b)477 license_ref_cmp (License * a, License * b)
478 {
479   return strcmp (a->ref, b->ref);
480 }
481 
482 #define STRING_TABLE_MAX_STRINGS 100
483 typedef struct
484 {
485   GString *s;
486   guint num_escaped;
487   guint num_strings;
488   guint indices[STRING_TABLE_MAX_STRINGS];
489   gchar *strings[STRING_TABLE_MAX_STRINGS];     /* unescaped strings */
490 } StringTable;
491 
492 static StringTable *
string_table_new(void)493 string_table_new (void)
494 {
495   StringTable *t = g_new0 (StringTable, 1);
496 
497   t->s = g_string_new (NULL);
498   return t;
499 }
500 
501 static void
string_table_free(StringTable * t)502 string_table_free (StringTable * t)
503 {
504   int i;
505 
506   for (i = 0; i < t->num_strings; ++i)
507     g_free (t->strings[i]);
508 
509   g_string_free (t->s, TRUE);
510   g_free (t);
511 }
512 
513 static guint
string_table_add_string(StringTable * t,const gchar * str)514 string_table_add_string (StringTable * t, const gchar * str)
515 {
516   const gchar *s;
517   guint idx, i;
518 
519   /* check if we already have this string */
520   for (i = 0; i < t->num_strings; ++i) {
521     if (strcmp (t->strings[i], str) == 0)
522       return t->indices[i];
523   }
524 
525   /* save current offset */
526   idx = t->s->len;
527 
528   /* adjust for fact that \000 is 4 chars now but will take up only 1 later */
529   idx -= t->num_escaped * 3;
530 
531   /* append one char at a time, making sure to escape UTF-8 characters */
532   for (s = str; s != NULL && *s != '\0'; ++s) {
533     if (g_ascii_isprint (*s) && *s != '"' && *s != '\\') {
534       g_string_append_c (t->s, *s);
535     } else {
536       g_string_append_printf (t->s, "\\%03o", (unsigned char) *s);
537       t->num_escaped++;
538     }
539   }
540   g_string_append (t->s, "\\000");
541   t->num_escaped++;
542 
543   t->indices[t->num_strings] = idx;
544   t->strings[t->num_strings] = g_strdup (str);
545   ++t->num_strings;
546 
547   return idx;
548 }
549 
550 static void
string_table_print(StringTable * t)551 string_table_print (StringTable * t)
552 {
553   const gchar *s;
554 
555   s = t->s->str;
556   while (s != NULL && *s != '\0') {
557     gchar line[74], *lastesc;
558     guint left;
559 
560     left = strlen (s);
561     g_strlcpy (line, s, MIN (left, sizeof (line)));
562     s += sizeof (line) - 1;
563     /* avoid partial escaped codes at the end of a line */
564     if ((lastesc = strrchr (line, '\\')) && strlen (lastesc) < 4) {
565       s -= strlen (lastesc);
566       *lastesc = '\0';
567     }
568     g_print ("  \"%s\"", line);
569     if (left < 74)
570       break;
571     g_print ("\n");
572   }
573   g_print (";\n");
574 }
575 
576 /* skip translation if translated string for e.g. "fr_ca" is same as for "fr" */
577 static gboolean
skip_translation(GHashTable * ht_strings,const gchar * lang,const gchar * trans)578 skip_translation (GHashTable * ht_strings, const gchar * lang,
579     const gchar * trans)
580 {
581   const gchar *simple_trans;
582   gchar *simple_lang;
583 
584   if (strchr (lang, '_') == NULL)
585     return FALSE;
586 
587   simple_lang = g_strdup (lang);
588   g_strdelimit (simple_lang, "_", '\0');
589 
590   simple_trans = g_hash_table_lookup (ht_strings, (gpointer) simple_lang);
591   g_free (simple_lang);
592 
593   return (simple_trans != NULL && strcmp (trans, simple_trans) == 0);
594 }
595 
596 static GVariant *
create_translation_dict(GHashTable * ht_strings,const gchar * en)597 create_translation_dict (GHashTable * ht_strings, const gchar * en)
598 {
599   GVariantBuilder array;
600   guint count = 0;
601   GList *l;
602 
603   g_variant_builder_init (&array, G_VARIANT_TYPE_ARRAY);
604 
605   for (l = langs; l != NULL; l = l->next) {
606     const gchar *trans, *lang;
607 
608     lang = (const gchar *) l->data;
609     trans = g_hash_table_lookup (ht_strings, (gpointer) lang);
610     if (trans != NULL && *trans != '\0' && strcmp (en, trans) != 0 &&
611         !skip_translation (ht_strings, lang, trans)) {
612       /* g_print ("%s (%s) => %s\n", en, lang, trans); */
613       g_variant_builder_add_value (&array,
614           g_variant_new_dict_entry (g_variant_new_string (lang),
615               g_variant_new_string (trans)));
616       ++count;
617     }
618   }
619 
620   if (count == 0) {
621     g_variant_builder_clear (&array);
622     return NULL;
623   }
624 
625   return g_variant_builder_end (&array);
626 }
627 
628 static void
write_translations_dictionary(GList * licenses,const gchar * dict_filename)629 write_translations_dictionary (GList * licenses, const gchar * dict_filename)
630 {
631   /* maps C string => (dictionary of: locale => translation) */
632   GVariantBuilder array;
633   /* maps C string => boolean (if it's in the dictionary already */
634   GHashTable *translations;
635   GVariant *var;
636   GList *l;
637   FILE *f;
638 
639   /* sort langs for prettiness / to make variant dumps easier to read */
640   langs = g_list_sort (langs, (GCompareFunc) strcmp);
641 
642   g_variant_builder_init (&array, G_VARIANT_TYPE_ARRAY);
643 
644   translations = g_hash_table_new (g_str_hash, g_str_equal);
645 
646   for (l = licenses; l != NULL; l = l->next) {
647     const gchar *en;
648     License *license;
649 
650     license = l->data;
651 
652     if (license->packed_into_source)
653       continue;
654 
655     /* add title + translations */
656     en = g_hash_table_lookup (license->titles, "en");
657     g_assert (en != NULL);
658 
659     /* check if we already have added translations for this string */
660     if (!g_hash_table_lookup (translations, (gpointer) en)) {
661       GVariant *trans;
662 
663       trans = create_translation_dict (license->titles, en);
664       if (trans != NULL) {
665         g_variant_builder_add_value (&array,
666             g_variant_new_dict_entry (g_variant_new_string (en), trans));
667         g_hash_table_insert (translations, (gpointer) en,
668             GINT_TO_POINTER (TRUE));
669       }
670     }
671 
672     /* add description + translations */
673     if (license->descriptions == NULL)
674       continue;
675 
676     en = g_hash_table_lookup (license->descriptions, "en");
677     g_assert (en != NULL);
678 
679     /* check if we already have added translations for this string */
680     if (!g_hash_table_lookup (translations, (gpointer) en)) {
681       GVariant *trans;
682 
683       trans = create_translation_dict (license->descriptions, en);
684       if (trans != NULL) {
685         g_variant_builder_add_value (&array,
686             g_variant_new_dict_entry (g_variant_new_string (en), trans));
687         g_hash_table_insert (translations, (gpointer) en,
688             GINT_TO_POINTER (TRUE));
689       }
690     }
691   }
692 
693   var = g_variant_builder_end (&array);
694 
695   f = fopen (dict_filename, "wb");
696   if (fwrite (g_variant_get_data (var), g_variant_get_size (var), 1, f) != 1) {
697     g_error ("failed to write dict to file: %s", g_strerror (errno));
698   }
699   fclose (f);
700 
701   g_printerr ("Wrote dictionary to %s, size: %u, type: %s\n", dict_filename,
702       (guint) g_variant_get_size (var), (gchar *) g_variant_get_type (var));
703 
704   g_variant_unref (var);
705   g_hash_table_destroy (translations);
706 }
707 
708 int
main(int argc,char ** argv)709 main (int argc, char **argv)
710 {
711   gchar *translation_dict_fn = NULL;
712   GOptionContext *ctx;
713   GOptionEntry options[] = {
714     {"translation-dictionary", 0, 0, G_OPTION_ARG_FILENAME,
715           &translation_dict_fn, "Filename of translations dictionary to write",
716         NULL},
717     {NULL}
718   };
719   StringTable *string_table;
720   GError *err = NULL;
721   GList *l;
722   int idx = 0;
723 
724   ctx = g_option_context_new ("");
725   g_option_context_add_main_entries (ctx, options, NULL);
726   if (!g_option_context_parse (ctx, &argc, &argv, &err)) {
727     g_printerr ("Error initializing: %s\n", err->message);
728     g_option_context_free (ctx);
729     g_clear_error (&err);
730     exit (1);
731   }
732   g_option_context_free (ctx);
733 
734   read_licenses (LIBLICENSE_DATA_PREFIX);
735 
736   g_printerr ("%d licenses\n", g_list_length (licenses));
737 
738   unknown_sources = g_hash_table_new (g_str_hash, g_str_equal);
739 
740   for (l = licenses; l != NULL; l = l->next) {
741     License *license = l->data;
742 
743     /* if the license has as source, check if we can 'pack' it into the
744      * original license as a jurisdiction-specific variant */
745     if (license->source != NULL) {
746       License *source = find_license (license->source);
747 
748       if (source != NULL) {
749         if (source->flags != license->flags) {
750           g_printerr ("Source and derived license have different flags:\n"
751               "\t0x%08x : %s\n\t0x%08x : %s\n", source->flags, source->ref,
752               license->flags, license->ref);
753           source = NULL;
754         } else {
755           if (source->descriptions == NULL) {
756             /* neither should the derived one then */
757             g_assert (license->descriptions == NULL);
758           } else {
759             /* make sure we're not settling for fewer descriptions than
760              * there are */
761             g_assert (g_hash_table_size (license->titles) <=
762                 g_hash_table_size (source->titles));
763             g_assert (g_hash_table_size (license->descriptions) <=
764                 g_hash_table_size (source->descriptions));
765           }
766         }
767       } else {
768         /* a source is referenced that we haven't encountered
769          * (possibly a referencing bug? seems to happen e.g. when there's a
770          * 2.1 version of a jurisdiction license and it refers to a 2.1
771          * source version, but there's only a 2.0 or 2.5 source version. So
772          * maybe it's supposed to refer to the 2.0 source then, who knows) */
773         if (!g_hash_table_lookup (unknown_sources, license->source)) {
774           g_printerr ("Unknown source license %s\n", license->source);
775           g_hash_table_insert (unknown_sources, g_strdup (license->source),
776               GUINT_TO_POINTER (TRUE));
777         }
778         /* g_print ("Unknown source license %s referenced from %s\n",
779          * license->source, license->ref); */
780       }
781 
782       /* should we pack this into the source or not */
783       if (source != NULL) {
784         source->jurisdiction |= license->jurisdiction;
785         source->derived = g_list_insert_sorted (source->derived, license,
786             (GCompareFunc) license_ref_cmp);
787         license->packed_into_source = TRUE;
788       }
789     } else {
790       /* no source license */
791       if (license->titles == NULL)
792         g_error ("License has no titles: %s\n", license->ref);
793       if (license->descriptions == NULL)
794         g_printerr ("License %s has no descriptions!\n", license->ref);
795     }
796   }
797 
798   licenses = g_list_sort (licenses, (GCompareFunc) license_ref_cmp);
799 
800   string_table = string_table_new ();
801 
802   g_print ("/* created by mklicensestables.c */\n");
803   g_print ("static const struct {\n"
804       "  /* jurisdictions in addition to the generic version, bitfield */\n"
805       "  const guint64             jurisdictions;\n"
806       "  const GstTagLicenseFlags  flags;\n"
807       "  /* the bit after http://creativecommons.org/licenses/ */\n"
808       "  const gchar               ref[18];\n"
809       "  gint16                    title_idx;  /* index in string table */\n"
810       "  gint16                    desc_idx;   /* index in string table */\n"
811       "} licenses[] = {\n");
812 
813   for (l = licenses; l != NULL; l = l->next) {
814     const gchar *title_en, *desc_en;
815     int idx_title, idx_desc;
816     License *license;
817 
818     license = l->data;
819 
820     if (license->packed_into_source)
821       continue;
822 
823     title_en = g_hash_table_lookup (license->titles, "en");
824     g_assert (title_en != NULL);
825     idx_title = string_table_add_string (string_table, title_en);
826     g_assert (idx_title <= G_MAXINT16);
827 
828     if (license->descriptions != NULL) {
829       desc_en = g_hash_table_lookup (license->descriptions, "en");
830       g_assert (desc_en != NULL);
831       idx_desc = string_table_add_string (string_table, desc_en);
832       g_assert (idx_desc <= G_MAXINT16);
833     } else {
834       idx_desc = -1;
835     }
836 
837     /* output comments with license refs covered by the next stanza */
838     if (license->derived != NULL) {
839       GList *d;
840 
841       g_print ("  /* %2d %s\n", idx, license->ref);
842 
843       for (d = license->derived; d != NULL; d = d->next) {
844         License *derived_license = d->data;
845 
846         g_print ("   * %2d %s%s\n", idx, derived_license->ref,
847             (d->next == NULL) ? " */" : "");
848       }
849     } else {
850       g_print ("  /* %2d %s */\n", idx, license->ref);
851     }
852     /* output essential data */
853     {
854       gchar *ref;
855 
856       ref =
857           g_strdup (license->ref +
858           strlen ("http://creativecommons.org/licenses/"));
859 
860       /* remove jurisdiction suffix from ref if this is non-generic, since
861        * the suffix is already contained in the jurisdiction flags */
862       if (license->jurisdiction_suffix != NULL) {
863         gsize suffix_len = strlen (license->jurisdiction_suffix);
864         gchar *cutoff;
865 
866         cutoff = ref + strlen (ref) - suffix_len;
867         g_assert (!strncmp (cutoff, license->jurisdiction_suffix, suffix_len));
868         g_assert (cutoff[suffix_len - 1] == '/');
869         g_assert (cutoff[suffix_len] == '\0');
870         *cutoff = '\0';
871       }
872 
873       g_print ("  { 0x%016" G_GINT64_MODIFIER "x, 0x%08x, \"%s\", %d, %d }%s\n",
874           license->jurisdiction, license->flags, ref, idx_title, idx_desc,
875           (l->next != NULL) ? "," : "");
876 
877       g_free (ref);
878     }
879     ++idx;
880   }
881   g_print ("};\n");
882 
883   g_print ("\nstatic const gchar license_strings[] =\n");
884   string_table_print (string_table);
885   string_table_free (string_table);
886   string_table = NULL;
887 
888   if (translation_dict_fn != NULL) {
889     write_translations_dictionary (licenses, translation_dict_fn);
890   }
891 
892   return 0;
893 }
894