1# 2# Copyright 2007 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""This module represents the Chinese language (Both tradisional and simplified). 20 21.. seealso:: http://en.wikipedia.org/wiki/Chinese_language 22""" 23 24 25import re 26 27from translate.lang import common 28 29 30class zh(common.Common): 31 """This class represents Chinese.""" 32 33 listseperator = "、" 34 35 sentenceend = "。!?!?…" 36 37 # Compared to common.py, we make the space after the sentence ending 38 # optional and don't demand an uppercase letter to follow. 39 sentencere = re.compile( 40 r"""(?s) # make . also match newlines 41 .*? # any text, but match non-greedy 42 [%s] # the puntuation for sentence ending 43 \s* # the optional space after the puntuation 44 """ 45 % sentenceend, 46 re.VERBOSE, 47 ) 48 49 # The following transformation rules should be mostly useful for all types 50 # of Chinese. The comma (,) is not handled here, since it maps to two 51 # different characters, depending on context. 52 # If comma is used as seperation of sentence, it should be converted to a 53 # fullwidth comma (","). If comma is used as seperation of list items 54 # like "apple, orange, grape, .....", "、" is used. 55 puncdict = { 56 ". ": "。", 57 "; ": ";", 58 ": ": ":", 59 "! ": "!", 60 "? ": "?", 61 ".\n": "。\n", 62 ";\n": ";\n", 63 ":\n": ":\n", 64 "!\n": "!\n", 65 "?\n": "?", 66 "% ": "%", 67 } 68 69 @classmethod 70 def length_difference(cls, length): 71 return 10 - length / 2 72 73 ignoretests = { 74 "all": ["simplecaps", "startcaps"], 75 } 76