1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4 5import codecs 6import collections 7import re 8 9""" 10This script van generate a dictionary of language names. 11This dictionary looks as follows: 12 13language_names = { 14 "C": { 15 "nl": "Dutch", 16 "de": "German", 17 "en": "English", 18 }, 19 "nl": { 20 "nl": "Nederlands", 21 "de": "Duits", 22 "en": "Engels", 23 }, 24} 25 26Etcetera. 27 28It can be created from: 29 30- the 'all_languages' file that is part of KDE (currently the only option). 31 32 33This generate.py script writes the dictionary to a file named 34data.py. 35 36This script needs not to be installed to be able to use the language_names package. 37 38""" 39 40# Here you should name the language names to be extracted. 41# If empty, all are used. "C" must be named. 42 43# lang_names = [] 44lang_names = [ 45 "C", "en", "de", "fr", "es", "nl", "pl", "pt_BR", 46 "cs", "ru", "hu", "gl", "it", "tr", "uk", 47 "ja", "zh_CN", "zh_HK", "zh_TW", 48] 49 50 51def generate_kde(fileName="/usr/share/locale/all_languages"): 52 """Uses the KDE file to extract language names. 53 54 Returns the dictionary. All strings are in unicode form. 55 56 """ 57 langs = collections.defaultdict(dict) 58 59 group = None 60 with codecs.open(fileName, "r", "utf-8") as langfile: 61 for line in langfile: 62 line = line.strip() 63 m = re.match(r"\[([^]]+)\]", line) 64 if m: 65 group = m.group(1) 66 elif group and group != 'x-test': 67 m = re.match(r"Name(?:\[([^]]+)\])?\s*=(.*)$", line) 68 if m: 69 lang, name = m.group(1) or "C", m.group(2) 70 langs[lang][group] = name 71 72 # correct KDE mistake 73 langs["cs"]["gl"] = "Galicijský" 74 langs["zh_HK"]["gl"] = "加利西亞語" 75 langs["zh_HK"]["zh_HK"] = "繁體中文(香港)" 76 return dict(langs) 77 78 79def makestring(text): 80 """Returns the text wrapped in quotes, usable as Python input (expecting unicode_literals).""" 81 return '"' + re.sub(r'([\\"])', r'\\\1', text) + '"' 82 83 84def write_dict(langs): 85 """Writes the dictionary file to the 'data.py' file.""" 86 87 keys = sorted(filter(lambda k: k in langs, lang_names) if lang_names else langs) 88 89 with codecs.open("data.py", "w", "utf-8") as output: 90 output.write("# -*- coding: utf-8;\n\n") 91 output.write("# Do not edit, this file is generated. See generate.py.\n") 92 output.write("\n\n") 93 94 output.write("language_names = {\n") 95 for key in keys: 96 output.write('{0}: {{\n'.format(makestring(key))) 97 for lang in sorted(langs[key]): 98 output.write(' {0}:{1},\n'.format(makestring(lang), makestring(langs[key][lang]))) 99 output.write('},\n') 100 output.write("}\n\n# End of data.py\n") 101 102 103if __name__ == "__main__": 104 langs = generate_kde() 105 langs['zh'] = langs['zh_CN'] 106 write_dict(langs) 107