1#
2# Copyright 2007, 2010 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""This module represents the Persian language.
20
21.. seealso:: http://en.wikipedia.org/wiki/Persian_language
22"""
23
24
25import re
26
27from translate.lang import common
28
29
30def guillemets(text):
31    def convertquotation(match):
32        prefix = match.group(1)
33        # Let's see that we didn't perhaps match an XML tag property like
34        # <a href="something">
35        if prefix == "=":
36            return match.group(0)
37        return f"{prefix}«{match.group(2)}»"
38
39    # Check that there is an even number of double quotes, otherwise it is
40    # probably not safe to convert them.
41    if text.count('"') % 2 == 0:
42        text = re.sub('(.|^)"([^"]+)"', convertquotation, text)
43    singlecount = text.count("'")
44    if singlecount:
45        if singlecount == text.count("`"):
46            text = re.sub("(.|^)`([^']+)'", convertquotation, text)
47        elif singlecount % 2 == 0:
48            text = re.sub("(.|^)'([^']+)'", convertquotation, text)
49    text = re.sub("(.|^)“([^”]+)”", convertquotation, text)
50    return text
51
52
53class fa(common.Common):
54    """This class represents Persian."""
55
56    listseperator = "، "
57
58    puncdict = {
59        ",": "،",
60        ";": "؛",
61        "?": "؟",
62        # This causes problems with variables, so commented out for now:
63        # "%": "٪",
64    }
65
66    numbertuple = (
67        # It seems that Persian uses both Arabic-Indic and Extended
68        # Arabic-Indic digits.
69        ("0", "٠"),  # U+0660 Arabic-Indic digit zero.
70        ("1", "١"),  # U+0661 Arabic-Indic digit one.
71        ("2", "٢"),  # U+0662 Arabic-Indic digit two.
72        ("3", "٣"),  # U+0663 Arabic-Indic digit three.
73        ("4", "٤"),  # U+0664 Arabic-Indic digit four.
74        ("5", "٥"),  # U+0665 Arabic-Indic digit five.
75        ("6", "٦"),  # U+0666 Arabic-Indic digit six.
76        ("7", "٧"),  # U+0667 Arabic-Indic digit seven.
77        ("8", "٨"),  # U+0668 Arabic-Indic digit eight.
78        ("9", "٩"),  # U+0669 Arabic-Indic digit nine.
79        ("0", "۰"),  # U+06F0 Extended Arabic-Indic digit zero.
80        ("1", "۱"),  # U+06F1 Extended Arabic-Indic digit one.
81        ("2", "۲"),  # U+06F2 Extended Arabic-Indic digit two.
82        ("3", "۳"),  # U+06F3 Extended Arabic-Indic digit three.
83        ("4", "۴"),  # U+06F4 Extended Arabic-Indic digit four.
84        ("5", "۵"),  # U+06F5 Extended Arabic-Indic digit five.
85        ("6", "۶"),  # U+06F6 Extended Arabic-Indic digit six.
86        ("7", "۷"),  # U+06F7 Extended Arabic-Indic digit seven.
87        ("8", "۸"),  # U+06F8 Extended Arabic-Indic digit eight.
88        ("9", "۹"),  # U+06F9 Extended Arabic-Indic digit nine.
89    )
90
91    ignoretests = {
92        "all": ["simplecaps", "startcaps"],
93    }
94    # TODO: check persian numerics
95    # TODO: zwj and zwnj?
96
97    @classmethod
98    def punctranslate(cls, text):
99        """Implement "French" quotation marks."""
100        text = super().punctranslate(text)
101        return guillemets(text)
102