1 /* GStreamer Language Tag Utility Functions
2 * Copyright (C) 2009 Tim-Philipp Müller <tim centricular net>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 /* mklangtables.c:
21 * little program that reads iso_639.xml and outputs tables for us as fallback
22 * for when iso-codes are not available or we fail to read the file for some
23 * reason, and so we don't have to parse the xml file just to map codes.
24 */
25
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include <glib.h>
31 #include <string.h>
32
33 #define ISO_639_XML_PATH ISO_CODES_PREFIX "/share/xml/iso-codes/iso_639.xml"
34
35 typedef struct
36 {
37 gchar code_1[3]; /* de */
38 gchar code_2t[4]; /* deu */
39 gchar code_2b[4]; /* ger */
40 const gchar *name; /* German */
41 guint name_offset; /* offset into string table */
42 } IsoLang;
43
44 static GArray *languages = NULL;
45
46 static void
dump_languages(void)47 dump_languages (void)
48 {
49 GString *names;
50 const char *s;
51 int i, num_escaped;
52
53 g_assert (languages != NULL);
54
55 names = g_string_new ("");
56
57 g_print ("/* generated by mklangtables from iso-codes " ISO_CODES_VERSION
58 " */\n");
59 g_print ("/* License: LGPL-2.1+ */\n");
60 g_print ("\n");
61 g_print ("#include <glib.h>\n");
62 g_print ("\n");
63 g_print ("#define ISO_639_FLAG_2T (1 << 0)\n");
64 g_print ("#define ISO_639_FLAG_2B (1 << 1)\n");
65 g_print ("\n");
66 g_print ("/* *INDENT-OFF* */\n");
67 g_print ("\n");
68 g_print ("static const struct\n");
69 g_print ("{\n");
70 g_print (" const gchar iso_639_1[3];\n");
71 g_print (" const gchar iso_639_2[4];\n");
72 g_print (" guint8 flags;\n");
73 g_print (" guint16 name_offset;\n");
74 g_print ("} iso_639_codes[] = {\n");
75
76 for (i = 0, num_escaped = 0; i < languages->len; ++i) {
77 IsoLang *lang = &g_array_index (languages, IsoLang, i);
78
79 /* For now just print those where there's both a ISO-639-1 and -2 code */
80 if (lang->code_1[0] == '\0')
81 continue;
82
83 /* save current offset */
84 lang->name_offset = names->len;
85
86 /* adjust for fact that \000 is 4 chars now but will take up only 1 later */
87 lang->name_offset -= num_escaped * 3;
88
89 /* append one char at a time, making sure to escape UTF-8 characters */
90 for (s = lang->name; s != NULL && *s != '\0'; ++s) {
91 if (g_ascii_isprint (*s) && *s != '"' && *s != '\\') {
92 g_string_append_c (names, *s);
93 } else {
94 g_string_append_printf (names, "\\%03o", (unsigned char) *s);
95 ++num_escaped;
96 }
97 }
98 g_string_append (names, "\\000");
99 ++num_escaped;
100
101 g_print (" /* %s */\n", lang->name);
102 if (strcmp (lang->code_2b, lang->code_2t) == 0) {
103 g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2T | ISO_639_FLAG_2B, %u },\n",
104 lang->code_1, lang->code_2t, lang->name_offset);
105 } else {
106 /* if 639-2T and 639-2B differ, put 639-2T first */
107 g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2T, %u },\n",
108 lang->code_1, lang->code_2t, lang->name_offset);
109 g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2B, %u },\n",
110 lang->code_1, lang->code_2b, lang->name_offset);
111 }
112 }
113
114 g_print ("};\n");
115 g_print ("\n");
116 g_print ("static const gchar iso_639_names[] =\n");
117 s = names->str;
118 while (s != NULL && *s != '\0') {
119 gchar line[74], *lastesc;
120 guint left;
121
122 left = strlen (s);
123 g_strlcpy (line, s, MIN (left, sizeof (line)));
124 s += sizeof (line) - 1;
125 /* avoid partial escaped codes at the end of a line */
126 if ((lastesc = strrchr (line, '\\')) && strlen (lastesc) < 4) {
127 s -= strlen (lastesc);
128 *lastesc = '\0';
129 }
130 g_print (" \"%s\"", line);
131 if (left < 74)
132 break;
133 g_print ("\n");
134 }
135 g_print (";\n");
136 g_print ("\n");
137 g_print ("/* *INDENT-ON* */\n");
138
139 g_string_free (names, TRUE);
140 }
141
142 static gboolean
copy_attribute(gchar * dest,guint dest_len,const gchar ** attr_names,const gchar ** attr_vals,const gchar * needle)143 copy_attribute (gchar * dest, guint dest_len, const gchar ** attr_names,
144 const gchar ** attr_vals, const gchar * needle)
145 {
146 while (attr_names != NULL && *attr_names != NULL) {
147 if (strcmp (*attr_names, needle) == 0) {
148 g_strlcpy (dest, *attr_vals, dest_len);
149 return TRUE;
150 }
151 ++attr_names;
152 ++attr_vals;
153 }
154 dest[0] = '\0';
155 return FALSE;
156 }
157
158 static void
xml_start_element(GMarkupParseContext * ctx,const gchar * element_name,const gchar ** attr_names,const gchar ** attr_vals,gpointer user_data,GError ** error)159 xml_start_element (GMarkupParseContext * ctx, const gchar * element_name,
160 const gchar ** attr_names, const gchar ** attr_vals,
161 gpointer user_data, GError ** error)
162 {
163 gchar name[256];
164 IsoLang lang;
165
166 if (strcmp (element_name, "iso_639_entry") != 0)
167 return;
168
169 copy_attribute (lang.code_1, 3, attr_names, attr_vals, "iso_639_1_code");
170 copy_attribute (lang.code_2t, 4, attr_names, attr_vals, "iso_639_2T_code");
171 copy_attribute (lang.code_2b, 4, attr_names, attr_vals, "iso_639_2B_code");
172
173 copy_attribute (name, sizeof (name), attr_names, attr_vals, "name");
174 lang.name = g_intern_string (name);
175
176 g_array_append_val (languages, lang);
177 }
178
179 static void
parse_iso_639_xml(const gchar * data,gsize len)180 parse_iso_639_xml (const gchar * data, gsize len)
181 {
182 GMarkupParser xml_parser = { xml_start_element, NULL, NULL, NULL, NULL };
183 GMarkupParseContext *ctx;
184 GError *err = NULL;
185
186 g_return_if_fail (g_utf8_validate (data, len, NULL));
187
188 ctx = g_markup_parse_context_new (&xml_parser, 0, NULL, NULL);
189 if (!g_markup_parse_context_parse (ctx, data, len, &err))
190 g_error ("Parsing failed: %s", err->message);
191
192 g_markup_parse_context_free (ctx);
193 }
194
195 static gint
languages_sort_func(IsoLang * l1,IsoLang * l2)196 languages_sort_func (IsoLang * l1, IsoLang * l2)
197 {
198 if (l1 == l2)
199 return 0;
200
201 if (l1->code_1[0] == '\0' && l2->code_1[0] != '\0')
202 return -1;
203
204 return strcmp (l1->code_1, l2->code_1);
205 }
206
207 int
main(int argc,char ** argv)208 main (int argc, char **argv)
209 {
210 GMappedFile *f;
211 gchar *xml_data;
212 gsize xml_len;
213
214 f = g_mapped_file_new (ISO_639_XML_PATH, FALSE, NULL);
215 if (f != NULL) {
216 xml_data = (gchar *) g_mapped_file_get_contents (f);
217 xml_len = g_mapped_file_get_length (f);
218 } else {
219 GError *err = NULL;
220
221 if (!g_file_get_contents (ISO_639_XML_PATH, &xml_data, &xml_len, &err))
222 g_error ("Could not read %s: %s", ISO_639_XML_PATH, err->message);
223 }
224
225 languages = g_array_new (FALSE, TRUE, sizeof (IsoLang));
226
227 parse_iso_639_xml (xml_data, xml_len);
228
229 g_array_sort (languages, (GCompareFunc) languages_sort_func);
230
231 dump_languages ();
232
233 g_array_free (languages, TRUE);
234
235 if (f != NULL)
236 g_mapped_file_unref (f);
237 else
238 g_free (xml_data);
239
240 return 0;
241 }
242