1#
2# Copyright 2009 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Functions to manage Poedit's language features.
20
21.. note:: The ISO 639 maps are from Poedit's
22   `isocode.cpp
23   <https://github.com/vslavik/poedit/blob/v1.4.2/src/isocodes.cpp#L36-227>`_
24   (v1.4.2) to ensure that we match currently released versions of Poedit.
25"""
26
27
28lang_codes = {
29    "aa": "Afar",
30    "ab": "Abkhazian",
31    "ae": "Avestan",
32    "af": "Afrikaans",
33    "am": "Amharic",
34    "ar": "Arabic",
35    "as": "Assamese",
36    "ay": "Aymara",
37    "az": "Azerbaijani",
38    "ba": "Bashkir",
39    "be": "Belarusian",
40    "bg": "Bulgarian",
41    "bh": "Bihari",
42    "bi": "Bislama",
43    "bn": "Bengali",
44    "bo": "Tibetan",
45    "br": "Breton",
46    "bs": "Bosnian",
47    "ca": "Catalan",
48    "ce": "Chechen",
49    "ch": "Chamorro",
50    "co": "Corsican",
51    "cs": "Czech",
52    "cu": "Church Slavic",
53    "cv": "Chuvash",
54    "cy": "Welsh",
55    "da": "Danish",
56    "de": "German",
57    "dz": "Dzongkha",
58    "el": "Greek",
59    "en": "English",
60    "eo": "Esperanto",
61    "es": "Spanish",
62    "et": "Estonian",
63    "eu": "Basque",
64    "fa": "Persian",
65    "fi": "Finnish",
66    "fj": "Fijian",
67    "fo": "Faroese",
68    "fr": "French",
69    "fur": "Friulian",
70    "fy": "Frisian",
71    "ga": "Irish",
72    "gd": "Gaelic",
73    "gl": "Galician",
74    "gn": "Guarani",
75    "gu": "Gujarati",
76    "ha": "Hausa",
77    "he": "Hebrew",
78    "hi": "Hindi",
79    "ho": "Hiri Motu",
80    "hr": "Croatian",
81    "hu": "Hungarian",
82    "hy": "Armenian",
83    "hz": "Herero",
84    "ia": "Interlingua",
85    "id": "Indonesian",
86    "ie": "Interlingue",
87    "ik": "Inupiaq",
88    "is": "Icelandic",
89    "it": "Italian",
90    "iu": "Inuktitut",
91    "ja": "Japanese",
92    "jw": "Javanese",
93    "ka": "Georgian",
94    "ki": "Kikuyu",
95    "kj": "Kuanyama",
96    "kk": "Kazakh",
97    "kl": "Kalaallisut",
98    "km": "Khmer",
99    "kn": "Kannada",
100    "ko": "Korean",
101    "ks": "Kashmiri",
102    "ku": "Kurdish",
103    "kv": "Komi",
104    "kw": "Cornish",
105    "ky": "Kyrgyz",
106    "la": "Latin",
107    "lb": "Letzeburgesch",
108    "ln": "Lingala",
109    "lo": "Lao",
110    "lt": "Lithuanian",
111    "lv": "Latvian",
112    "mg": "Malagasy",
113    "mh": "Marshall",
114    "mi": "Maori",
115    "mk": "Macedonian",
116    "ml": "Malayalam",
117    "mn": "Mongolian",
118    "mo": "Moldavian",
119    "mr": "Marathi",
120    "ms": "Malay",
121    "mt": "Maltese",
122    "my": "Burmese",
123    "na": "Nauru",
124    "ne": "Nepali",
125    "ng": "Ndonga",
126    "nl": "Dutch",
127    "nn": "Norwegian Nynorsk",
128    "nb": "Norwegian Bokmal",
129    "nr": "Ndebele, South",
130    "nv": "Navajo",
131    "ny": "Chichewa; Nyanja",
132    "oc": "Occitan",
133    "om": "(Afan) Oromo",
134    "or": "Oriya",
135    "os": "Ossetian; Ossetic",
136    "pa": "Panjabi",
137    "pi": "Pali",
138    "pl": "Polish",
139    "ps": "Pashto, Pushto",
140    "pt": "Portuguese",
141    "qu": "Quechua",
142    "rm": "Rhaeto-Romance",
143    "rn": "Rundi",
144    "ro": "Romanian",
145    "ru": "Russian",
146    "rw": "Kinyarwanda",
147    "sa": "Sanskrit",
148    "sc": "Sardinian",
149    "sd": "Sindhi",
150    "se": "Northern Sami",
151    "sg": "Sangro",
152    "sh": "Serbo-Croatian",
153    "si": "Sinhalese",
154    "sk": "Slovak",
155    "sl": "Slovenian",
156    "sm": "Samoan",
157    "sn": "Shona",
158    "so": "Somali",
159    "sq": "Albanian",
160    "sr": "Serbian",
161    "ss": "Siswati",
162    "st": "Sesotho",
163    "su": "Sundanese",
164    "sv": "Swedish",
165    "sw": "Swahili",
166    "ta": "Tamil",
167    "te": "Telugu",
168    "tg": "Tajik",
169    "th": "Thai",
170    "ti": "Tigrinya",
171    "tk": "Turkmen",
172    "tl": "Tagalog",
173    "tn": "Setswana",
174    "to": "Tonga",
175    "tr": "Turkish",
176    "ts": "Tsonga",
177    "tt": "Tatar",
178    "tw": "Twi",
179    "ty": "Tahitian",
180    "ug": "Uighur",
181    "uk": "Ukrainian",
182    "ur": "Urdu",
183    "uz": "Uzbek",
184    "vi": "Vietnamese",
185    "vo": "Volapuk",
186    "wa": "Walloon",
187    "wo": "Wolof",
188    "xh": "Xhosa",
189    "yi": "Yiddish",
190    "yo": "Yoruba",
191    "za": "Zhuang",
192    "zh": "Chinese",
193    "zu": "Zulu",
194}
195"""ISO369 codes and names as used by Poedit.
196Mostly these are identical to ISO 639, but there are some differences."""
197
198lang_names = {value: key for (key, value) in lang_codes.items()}
199"""Reversed :data:`lang_codes`"""
200
201dialects = {
202    "Portuguese": {"PORTUGAL": "pt", "BRAZIL": "pt_BR", "None": "pt"},
203    # We choose not to subtype en_US
204    "English": {
205        "UNITED KINGDOM": "en_GB",
206        "SOUTH AFRICA": "en_ZA",
207        "None": "en",
208    },
209    # zh_CN = Simplified, zh_TW = Traditional
210    "Chinese": {"CHINA": "zh_CN", "TAIWAN": "zh_TW", "None": "zh_CN"},
211}
212"""Language dialects based on ISO 3166 country names, 'None' is the
213default fallback"""
214
215
216def isocode(language, country=None):
217    """Returns a language code for the given Poedit language name.
218
219    Poedit uses language and country names in the PO header entries:
220
221    - X-Poedit-Language
222    - X-Poedit-Country
223
224    This function converts the supplied language name into the required ISO 639
225    code. If needed, in the case of :data:`dialects`, the country name is used
226    to create an xx_YY style dialect code.
227
228    :param language: Language name
229    :type language: String
230    :param country: Country name
231    :type country: String
232    :return: ISO 639 language code
233    :rtype: String
234    """
235    dialect = dialects.get(language, None)
236    if dialect:
237        return dialect.get(country, dialect["None"])
238    return lang_names.get(language, None)
239