1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil; -*- */
2 /* vim:set et sts=4: */
3 /* ibus - The Input Bus
4 * Copyright (C) 2018 Takao Fujiwara <takao.fujiwara1@gmail.com>
5 * Copyright (C) 2018 Red Hat, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
20 * USA
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <glib.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #ifdef HAVE_LOCALE_H
32 #include <locale.h>
33 #endif
34
35 #include "ibusunicode.h"
36
37 #define NAMES_LIST_SUBJECT "The Unicode Standard"
38 #define BLOCKS_SUBJECT "Blocks-"
39
40 /* This file has 21 lines about the license at the top of the file. */
41 #define LICENSE_LINES 21
42
43 typedef enum
44 {
45 UCD_NAMES_LIST,
46 UCD_BLOCKS
47 } UCDType;
48
49 typedef struct _UnicodeData UnicodeData;
50 typedef struct _UnicodeDataIndex UnicodeDataIndex;
51
52 struct _UnicodeData{
53 gunichar code;
54 gchar *name;
55 gchar *alias;
56 gunichar start;
57 gunichar end;
58 GSList *list;
59 };
60
61 struct _UnicodeDataIndex {
62 gchar *index;
63 UnicodeData *data_list;
64 };
65
66 static gchar *unicode_version;
67
68 static void
unicode_data_new_object(UnicodeData * data)69 unicode_data_new_object (UnicodeData *data)
70 {
71 g_return_if_fail (data != NULL);
72 if (!data->name) {
73 g_warning ("No name in U+%04X", data->code);
74 }
75 IBusUnicodeData *unicode =
76 ibus_unicode_data_new ("code",
77 data->code,
78 "name",
79 data->name ? g_strdup (data->name)
80 : g_strdup (""),
81 "alias",
82 data->alias ? g_strdup (data->alias)
83 : g_strdup (""),
84 NULL);
85 data->list = g_slist_append (data->list, unicode);
86 }
87
88 static void
unicode_block_new_object(UnicodeData * data)89 unicode_block_new_object (UnicodeData *data)
90 {
91 g_return_if_fail (data != NULL);
92 if (!data->name) {
93 g_warning ("No name in U+%04X", data->start);
94 }
95 IBusUnicodeBlock *block =
96 ibus_unicode_block_new ("start",
97 data->start,
98 "end",
99 data->end,
100 "name",
101 data->name ? g_strdup (data->name)
102 : g_strdup (""),
103 NULL);
104 data->list = g_slist_append (data->list, block);
105 }
106
107 static void
unicode_data_reset(UnicodeData * data)108 unicode_data_reset (UnicodeData *data)
109 {
110 g_return_if_fail (data != NULL);
111 data->code = 0;
112 g_clear_pointer (&data->name, g_free);
113 g_clear_pointer (&data->alias, g_free);
114 data->start = 0;
115 data->end = 0;
116 }
117
118 static gboolean
ucd_names_list_parse_comment(const gchar * line)119 ucd_names_list_parse_comment (const gchar *line)
120 {
121 static gboolean has_version = FALSE;
122
123 if (has_version)
124 return TRUE;
125 if (strlen (line) > 4 && strncmp (line, "@@@", 3) == 0) {
126 gchar **elements = g_strsplit (line, "\t", -1);
127 if (strncmp (elements[1], NAMES_LIST_SUBJECT,
128 strlen (NAMES_LIST_SUBJECT)) == 0) {
129 unicode_version =
130 g_strdup (elements[1] + strlen (NAMES_LIST_SUBJECT) + 1);
131 has_version = TRUE;
132 }
133 g_strfreev (elements);
134 }
135 return TRUE;
136 }
137
138 static gboolean
ucd_names_list_parse_alias(const gchar * line,UnicodeData * data)139 ucd_names_list_parse_alias (const gchar *line,
140 UnicodeData *data)
141 {
142 g_return_val_if_fail (line != NULL, FALSE);
143 g_return_val_if_fail (data != NULL, FALSE);
144
145 if (*line == '\0')
146 return FALSE;
147 data->alias = g_strdup (line);
148 return TRUE;
149 }
150
151 static gboolean
ucd_names_list_parse_indent_line(const gchar * line,UnicodeData * data)152 ucd_names_list_parse_indent_line (const gchar *line,
153 UnicodeData *data)
154 {
155 g_return_val_if_fail (line != NULL, FALSE);
156
157 switch (*line) {
158 case '\0':
159 return FALSE;
160 case '=':
161 line++;
162 while (*line == ' ') line++;
163 return ucd_names_list_parse_alias (line, data);
164 default:;
165 }
166 return TRUE;
167 }
168
169 static gboolean
ucd_names_list_parse_line(const gchar * line,UnicodeData * data)170 ucd_names_list_parse_line (const gchar *line,
171 UnicodeData *data)
172 {
173 g_return_val_if_fail (line != NULL, FALSE);
174
175 switch (*line) {
176 case '\0':
177 return TRUE;
178 case ';':
179 return TRUE;
180 case '@':
181 return ucd_names_list_parse_comment (line);
182 case '\t':
183 return ucd_names_list_parse_indent_line (line + 1, data);
184 default:;
185 }
186 if (g_ascii_isxdigit (*line)) {
187 gchar **elements = g_strsplit (line, "\t", -1);
188 gunichar code;
189 gchar *name;
190
191 if (g_strv_length (elements) < 2) {
192 g_strfreev (elements);
193 return FALSE;
194 }
195 code = g_ascii_strtoull (elements[0], NULL, 16);
196 name = g_strdup (elements[1]);
197 if (data->name) {
198 unicode_data_new_object (data);
199 unicode_data_reset (data);
200 }
201 data->code = code;
202 data->name = name;
203 }
204 return TRUE;
205 }
206
207 static gboolean
ucd_blocks_parse_comment(const gchar * line)208 ucd_blocks_parse_comment (const gchar *line)
209 {
210 static gboolean has_version = FALSE;
211
212 g_return_val_if_fail (line != NULL, FALSE);
213
214 if (has_version)
215 return TRUE;
216 while (*line == ' ') line++;
217 if (strlen (line) > strlen (BLOCKS_SUBJECT) &&
218 strncmp (line, BLOCKS_SUBJECT, strlen (BLOCKS_SUBJECT)) == 0) {
219 unicode_version = g_strdup (line + strlen (BLOCKS_SUBJECT) + 1);
220 has_version = TRUE;
221 }
222 return TRUE;
223 }
224
225 static gboolean
ucd_blocks_parse_line(const gchar * line,UnicodeData * data)226 ucd_blocks_parse_line (const gchar *line,
227 UnicodeData *data)
228 {
229 g_return_val_if_fail (line != NULL, FALSE);
230
231 switch (*line) {
232 case '\0':
233 return TRUE;
234 case '#':
235 return ucd_blocks_parse_comment (line + 1);
236 default:;
237 }
238 if (g_ascii_isxdigit (*line)) {
239 gchar *endptr = NULL;
240 gunichar start = g_ascii_strtoull (line, &endptr, 16);
241 gunichar end;
242 gchar *name = NULL;
243
244 if (endptr == NULL || *endptr == '\0')
245 return FALSE;
246 while (*endptr == '.') endptr++;
247 line = endptr;
248 endptr = NULL;
249 end = g_ascii_strtoull (line, &endptr, 16);
250 if (endptr == NULL || *endptr == '\0')
251 return FALSE;
252 while (*endptr == ';') endptr++;
253 while (*endptr == ' ') endptr++;
254 if (*endptr == '\0')
255 return FALSE;
256 name = g_strdup (endptr);
257 if (data->name) {
258 unicode_block_new_object (data);
259 unicode_data_reset (data);
260 }
261 data->start = start;
262 data->end = end;
263 data->name = name;
264 }
265 return TRUE;
266 }
267
268 static gboolean
ucd_parse_file(const gchar * filename,GSList ** list,UCDType type)269 ucd_parse_file (const gchar *filename,
270 GSList **list,
271 UCDType type)
272 {
273 UnicodeData data = { 0, };
274 gchar *content = NULL;
275 gsize length = 0;
276 GError *error = NULL;
277 gchar *head, *end, *line;
278 int n = 1;
279
280 g_return_val_if_fail (filename != NULL, FALSE);
281 g_return_val_if_fail (list != NULL, FALSE);
282
283 if (!g_file_get_contents (filename, &content, &length, &error)) {
284 g_warning ("Failed to load %s: %s",
285 filename, error ? error->message : "");
286 goto failed_to_parse_ucd_names_list;
287 }
288 head = end = content;
289 while (*end == '\n' && end - content < length) {
290 end++;
291 n++;
292 }
293 head = end;
294 while (end - content < length) {
295 while (*end != '\n' && end - content < length)
296 end++;
297 if (end - content >= length)
298 break;
299 line = g_strndup (head, end - head);
300 switch (type) {
301 case UCD_NAMES_LIST:
302 if (!ucd_names_list_parse_line (line, &data)) {
303 g_warning ("parse error #%d in %s version %s: %s",
304 n, filename,
305 unicode_version ? unicode_version : "(null)",
306 line);
307 }
308 break;
309 case UCD_BLOCKS:
310 if (!ucd_blocks_parse_line (line, &data)) {
311 g_warning ("parse error #%d in %s version %s: %s",
312 n, filename,
313 unicode_version ? unicode_version : "(null)",
314 line);
315 }
316 break;
317 default:
318 abort ();
319 }
320 while (*end == '\n' && end - content < length) {
321 end++;
322 n++;
323 }
324 g_free (line);
325 head = end;
326 }
327 if (data.name != NULL) {
328 switch (type) {
329 case UCD_NAMES_LIST:
330 unicode_data_new_object (&data);
331 break;
332 case UCD_BLOCKS:
333 unicode_block_new_object (&data);
334 break;
335 default:;
336 }
337 unicode_data_reset (&data);
338 }
339 g_free (content);
340 *list = data.list;
341 return TRUE;
342
343 failed_to_parse_ucd_names_list:
344 if (error)
345 g_error_free (error);
346 g_clear_pointer (&content, g_free);
347 *list = data.list;
348 return FALSE;
349 }
350
351 static void
block_list_dump(IBusUnicodeBlock * block,GString * buff)352 block_list_dump (IBusUnicodeBlock *block,
353 GString *buff)
354 {
355 g_return_if_fail (buff != NULL);
356
357 g_string_append (buff, " /* TRANSLATORS: You might refer the " \
358 "translations from gucharmap with\n" \
359 " the following command:\n" \
360 " msgmerge -C gucharmap.po ibus.po " \
361 "ibus.pot */\n");
362 gchar *line = g_strdup_printf (" N_(\"%s\"),\n",
363 ibus_unicode_block_get_name (block));
364 g_string_append (buff, line);
365 }
366
367 static void
ucd_block_translatable_save(const gchar * filename,GSList * blocks_list)368 ucd_block_translatable_save (const gchar *filename,
369 GSList *blocks_list)
370 {
371 gchar *content = NULL;
372 gsize length = 0;
373 GError *error = NULL;
374 gchar *p;
375 GString *buff = NULL;
376 int i;
377 GSList *list = blocks_list;
378
379 g_return_if_fail (filename != NULL);
380 g_return_if_fail (list != NULL);
381
382 if (!g_file_get_contents (__FILE__, &content, &length, &error)) {
383 g_warning ("Failed to load %s: %s", __FILE__, error->message);
384 g_clear_pointer (&error, g_error_free);
385 return;
386 }
387
388 buff = g_string_new (NULL);
389 p = content;
390 for (i = 0; i < LICENSE_LINES; i++, p++) {
391 if ((p = strchr (p, '\n')) == NULL)
392 break;
393 }
394 if (p != NULL) {
395 g_string_append (buff, g_strndup (content, p - content));
396 g_string_append_c (buff, '\n');
397 }
398 g_clear_pointer (&content, g_free);
399
400 g_string_append (buff, g_strdup ("\n"));
401 g_string_append (buff, g_strdup_printf ("/* This file is generated by %s. */", __FILE__));
402 g_string_append (buff, g_strdup ("\n"));
403 g_string_append (buff, g_strdup ("include <glib/gi18n.h>\n"));
404 g_string_append (buff, g_strdup ("\n"));
405 g_string_append (buff, g_strdup ("#ifndef __IBUS_UNICODE_GEN_H_\n"));
406 g_string_append (buff, g_strdup ("#define __IBUS_UNICODE_GEN_H_\n"));
407 g_string_append (buff, g_strdup ("const static char *unicode_blocks[] = {\n"));
408 g_slist_foreach (list, (GFunc)block_list_dump, buff);
409 g_string_append (buff, g_strdup ("};\n"));
410 g_string_append (buff, g_strdup ("#endif\n"));
411
412 if (!g_file_set_contents (filename, buff->str, -1, &error)) {
413 g_warning ("Failed to save emoji category file %s: %s", filename, error->message);
414 g_error_free (error);
415 }
416
417 g_string_free (buff, TRUE);
418 }
419
420 int
main(int argc,char * argv[])421 main (int argc, char *argv[])
422 {
423 gchar *prgname;
424 gchar *input_names_list = NULL;
425 gchar *input_blocks = NULL;
426 gchar *output_names_list = NULL;
427 gchar *output_blocks = NULL;
428 gchar *output_blocks_trans = NULL;
429 GOptionEntry entries[] = {
430 { "input-names-list", 'n', 0, G_OPTION_ARG_STRING, &input_names_list,
431 "Parse NamesList.txt FILE in unicode.org ",
432 "FILE"
433 },
434 { "input-blocks", 'b', 0, G_OPTION_ARG_STRING, &input_blocks,
435 "Parse Blocks.txt FILE in unicode.org ",
436 "FILE"
437 },
438 { "output-names-list", 'o', 0, G_OPTION_ARG_STRING, &output_names_list,
439 "Save the Unicode data as FILE",
440 "FILE"
441 },
442 { "output-blocks", 'B', 0, G_OPTION_ARG_STRING, &output_blocks,
443 "Save the Unicode block list as FILE",
444 "FILE"
445 },
446 { "output-blocks-trans", 'C', 0, G_OPTION_ARG_STRING,
447 &output_blocks_trans,
448 "Save the translatable Unicode blocks as FILE",
449 "FILE"
450 },
451 { NULL }
452 };
453 GOptionContext *context;
454 GError *error = NULL;
455 GSList *names_list = NULL;
456 GSList *blocks_list = NULL;
457
458 #ifdef HAVE_LOCALE_H
459 /* To output emoji warnings. */
460 setlocale (LC_ALL, "");
461 #endif
462
463 prgname = g_path_get_basename (argv[0]);
464 g_set_prgname (prgname);
465 g_free (prgname);
466
467 context = g_option_context_new (NULL);
468 g_option_context_add_main_entries (context, entries, NULL);
469
470 if (argc < 3) {
471 g_print ("%s", g_option_context_get_help (context, TRUE, NULL));
472 g_option_context_free (context);
473 return -1;
474 }
475
476 if (!g_option_context_parse (context, &argc, &argv, &error)) {
477 g_warning ("Failed options: %s", error->message);
478 g_error_free (error);
479 return -1;
480 }
481 g_option_context_free (context);
482
483 if (input_names_list) {
484 ucd_parse_file (input_names_list, &names_list, UCD_NAMES_LIST);
485 g_free (input_names_list);
486 }
487 if (output_names_list && names_list)
488 ibus_unicode_data_save (output_names_list, names_list);
489 g_free (output_names_list);
490
491 if (input_blocks) {
492 ucd_parse_file (input_blocks, &blocks_list, UCD_BLOCKS);
493 g_free (input_blocks);
494 }
495 if (output_blocks && blocks_list)
496 ibus_unicode_block_save (output_blocks, blocks_list);
497 if (output_blocks_trans && blocks_list)
498 ucd_block_translatable_save (output_blocks_trans, blocks_list);
499 g_free (output_blocks);
500
501 g_free (unicode_version);
502 return 0;
503 }
504