1#
2# Copyright 2007 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""This module represents the Chinese language (Both tradisional and simplified).
20
21.. seealso:: http://en.wikipedia.org/wiki/Chinese_language
22"""
23
24
25import re
26
27from translate.lang import common
28
29
30class zh(common.Common):
31    """This class represents Chinese."""
32
33    listseperator = "、"
34
35    sentenceend = "。!?!?…"
36
37    # Compared to common.py, we make the space after the sentence ending
38    # optional and don't demand an uppercase letter to follow.
39    sentencere = re.compile(
40        r"""(?s) # make . also match newlines
41                            .*?      # any text, but match non-greedy
42                            [%s]     # the puntuation for sentence ending
43                            \s*      # the optional space after the puntuation
44                            """
45        % sentenceend,
46        re.VERBOSE,
47    )
48
49    # The following transformation rules should be mostly useful for all types
50    # of Chinese. The comma (,) is not handled here, since it maps to two
51    # different characters, depending on context.
52    # If comma is used as seperation of sentence, it should be converted to a
53    # fullwidth comma (","). If comma is used as seperation of list items
54    # like "apple, orange, grape, .....", "、" is used.
55    puncdict = {
56        ". ": "。",
57        "; ": ";",
58        ": ": ":",
59        "! ": "!",
60        "? ": "?",
61        ".\n": "。\n",
62        ";\n": ";\n",
63        ":\n": ":\n",
64        "!\n": "!\n",
65        "?\n": "?",
66        "% ": "%",
67    }
68
69    @classmethod
70    def length_difference(cls, length):
71        return 10 - length / 2
72
73    ignoretests = {
74        "all": ["simplecaps", "startcaps"],
75    }
76