1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4
5import codecs
6import collections
7import re
8
9"""
10This script van generate a dictionary of language names.
11This dictionary looks as follows:
12
13language_names = {
14    "C": {
15        "nl": "Dutch",
16        "de": "German",
17        "en": "English",
18    },
19    "nl": {
20        "nl": "Nederlands",
21        "de": "Duits",
22        "en": "Engels",
23    },
24}
25
26Etcetera.
27
28It can be created from:
29
30- the 'all_languages' file that is part of KDE (currently the only option).
31
32
33This generate.py script writes the dictionary to a file named
34data.py.
35
36This script needs not to be installed to be able to use the language_names package.
37
38"""
39
40# Here you should name the language names to be extracted.
41# If empty, all are used. "C" must be named.
42
43# lang_names = []
44lang_names = [
45    "C", "en", "de", "fr", "es", "nl", "pl", "pt_BR",
46    "cs", "ru", "hu", "gl", "it", "tr", "uk",
47    "ja", "zh_CN", "zh_HK", "zh_TW",
48]
49
50
51def generate_kde(fileName="/usr/share/locale/all_languages"):
52    """Uses the KDE file to extract language names.
53
54    Returns the dictionary. All strings are in unicode form.
55
56    """
57    langs = collections.defaultdict(dict)
58
59    group = None
60    with codecs.open(fileName, "r", "utf-8") as langfile:
61        for line in langfile:
62            line = line.strip()
63            m = re.match(r"\[([^]]+)\]", line)
64            if m:
65                group = m.group(1)
66            elif group and group != 'x-test':
67                m = re.match(r"Name(?:\[([^]]+)\])?\s*=(.*)$", line)
68                if m:
69                    lang, name = m.group(1) or "C", m.group(2)
70                    langs[lang][group] = name
71
72    # correct KDE mistake
73    langs["cs"]["gl"] = "Galicijský"
74    langs["zh_HK"]["gl"] = "加利西亞語"
75    langs["zh_HK"]["zh_HK"] = "繁體中文(香港)"
76    return dict(langs)
77
78
79def makestring(text):
80    """Returns the text wrapped in quotes, usable as Python input (expecting unicode_literals)."""
81    return '"' + re.sub(r'([\\"])', r'\\\1', text) + '"'
82
83
84def write_dict(langs):
85    """Writes the dictionary file to the 'data.py' file."""
86
87    keys = sorted(filter(lambda k: k in langs, lang_names) if lang_names else langs)
88
89    with codecs.open("data.py", "w", "utf-8") as output:
90        output.write("# -*- coding: utf-8;\n\n")
91        output.write("# Do not edit, this file is generated. See generate.py.\n")
92        output.write("\n\n")
93
94        output.write("language_names = {\n")
95        for key in keys:
96            output.write('{0}: {{\n'.format(makestring(key)))
97            for lang in sorted(langs[key]):
98                output.write(' {0}:{1},\n'.format(makestring(lang), makestring(langs[key][lang])))
99            output.write('},\n')
100        output.write("}\n\n# End of data.py\n")
101
102
103if __name__ == "__main__":
104    langs = generate_kde()
105    langs['zh'] = langs['zh_CN']
106    write_dict(langs)
107