1#
2# Copyright 2007 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""This module represents the Afrikaans language.
20
21.. seealso:: http://en.wikipedia.org/wiki/Afrikaans_language
22"""
23
24
25import re
26
27from translate.lang import common
28
29
30articlere = re.compile(r"'n\b")
31
32
33class af(common.Common):
34    """This class represents Afrikaans."""
35
36    validdoublewords = [""]
37
38    punctuation = "".join(
39        [common.Common.commonpunc, common.Common.quotes, common.Common.miscpunc]
40    )
41    sentenceend = ".!?…"
42    sentencere = re.compile(
43        r"""
44        (?s)        # make . also match newlines
45        .*?         # anything, but match non-greedy
46        [%s]        # the puntuation for sentence ending
47        \s+         # the spacing after the puntuation
48        (?='n\s[A-Z]|[^'a-z\d]|'[^n])
49        # lookahead that next part starts with caps or 'n followed by caps
50        """
51        % sentenceend,
52        re.VERBOSE,
53    )
54
55    specialchars = "ëïêôûáéíóúý"
56
57    @classmethod
58    def capsstart(cls, text):
59        """Modify this for the indefinite article ('n)."""
60        match = articlere.search(text, 0, 20)
61        if match:
62            # construct a list of non-apostrophe punctuation:
63            nonapos = "".join(cls.punctuation.split("'"))
64            stripped = text.lstrip().lstrip(nonapos)
65            match = articlere.match(stripped)
66            if match:
67                return common.Common.capsstart(stripped[match.end() :])
68        return common.Common.capsstart(text)
69
70
71cyr2lat = {
72    "А": "A",
73    "а": "a",
74    "Б": "B",
75    "б": "b",
76    "В": "W",
77    "в": "w",  # Different if at the end of a syllable see rule 2.
78    "Г": "G",
79    "г": "g",  # see rule 3 and 4
80    "Д": "D",
81    "д": "d",
82    "ДЖ": "Dj",
83    "дж": "dj",
84    "Е": "Je",
85    "е": "je",  # Sometimes e need to check when/why see rule 5.
86    "Ё": "Jo",
87    "ё": "jo",  # see rule 6
88    "ЕЙ": "Ei",
89    "ей": "ei",
90    "Ж": "Zj",
91    "ж": "zj",
92    "З": "Z",
93    "з": "z",
94    "И": "I",
95    "и": "i",
96    "Й": "J",
97    "й": "j",  # see rule 9 and 10
98    "К": "K",
99    "к": "k",  # see note 11
100    "Л": "L",
101    "л": "l",
102    "М": "M",
103    "м": "m",
104    "Н": "N",
105    "н": "n",
106    "О": "O",
107    "о": "o",
108    "П": "P",
109    "п": "p",
110    "Р": "R",
111    "р": "r",
112    "С": "S",
113    "с": "s",  # see note 12
114    "Т": "T",
115    "т": "t",
116    "У": "Oe",
117    "у": "oe",
118    "Ф": "F",
119    "ф": "f",
120    "Х": "Ch",
121    "х": "ch",  # see rule 12
122    "Ц": "Ts",
123    "ц": "ts",
124    "Ч": "Tj",
125    "ч": "tj",
126    "Ш": "Sj",
127    "ш": "sj",
128    "Щ": "Sjtsj",
129    "щ": "sjtsj",
130    "Ы": "I",
131    "ы": "i",  # see note 13
132    "Ъ": "",
133    "ъ": "",  # See note 14
134    "Ь": "",
135    "ь": "",  # this letter is not in the AWS we assume it is left out as in the previous letter
136    "Э": "E",
137    "э": "e",
138    "Ю": "Joe",
139    "ю": "joe",
140    "Я": "Ja",
141    "я": "ja",
142}
143"""Mapping of Cyrillic to Latin letters for transliteration in Afrikaans"""
144
145cyr_vowels = "аеёиоуыэюя"
146
147
148def tranliterate_cyrillic(text):
149    """Convert Cyrillic text to Latin according to the AWS transliteration rules."""
150    trans = ""
151    for i in text:
152        trans += cyr2lat.get(i, i)
153    return trans
154