1# 2# Copyright 2007, 2010 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""This module represents the Persian language. 20 21.. seealso:: http://en.wikipedia.org/wiki/Persian_language 22""" 23 24 25import re 26 27from translate.lang import common 28 29 30def guillemets(text): 31 def convertquotation(match): 32 prefix = match.group(1) 33 # Let's see that we didn't perhaps match an XML tag property like 34 # <a href="something"> 35 if prefix == "=": 36 return match.group(0) 37 return f"{prefix}«{match.group(2)}»" 38 39 # Check that there is an even number of double quotes, otherwise it is 40 # probably not safe to convert them. 41 if text.count('"') % 2 == 0: 42 text = re.sub('(.|^)"([^"]+)"', convertquotation, text) 43 singlecount = text.count("'") 44 if singlecount: 45 if singlecount == text.count("`"): 46 text = re.sub("(.|^)`([^']+)'", convertquotation, text) 47 elif singlecount % 2 == 0: 48 text = re.sub("(.|^)'([^']+)'", convertquotation, text) 49 text = re.sub("(.|^)“([^”]+)”", convertquotation, text) 50 return text 51 52 53class fa(common.Common): 54 """This class represents Persian.""" 55 56 listseperator = "، " 57 58 puncdict = { 59 ",": "،", 60 ";": "؛", 61 "?": "؟", 62 # This causes problems with variables, so commented out for now: 63 # "%": "٪", 64 } 65 66 numbertuple = ( 67 # It seems that Persian uses both Arabic-Indic and Extended 68 # Arabic-Indic digits. 69 ("0", "٠"), # U+0660 Arabic-Indic digit zero. 70 ("1", "١"), # U+0661 Arabic-Indic digit one. 71 ("2", "٢"), # U+0662 Arabic-Indic digit two. 72 ("3", "٣"), # U+0663 Arabic-Indic digit three. 73 ("4", "٤"), # U+0664 Arabic-Indic digit four. 74 ("5", "٥"), # U+0665 Arabic-Indic digit five. 75 ("6", "٦"), # U+0666 Arabic-Indic digit six. 76 ("7", "٧"), # U+0667 Arabic-Indic digit seven. 77 ("8", "٨"), # U+0668 Arabic-Indic digit eight. 78 ("9", "٩"), # U+0669 Arabic-Indic digit nine. 79 ("0", "۰"), # U+06F0 Extended Arabic-Indic digit zero. 80 ("1", "۱"), # U+06F1 Extended Arabic-Indic digit one. 81 ("2", "۲"), # U+06F2 Extended Arabic-Indic digit two. 82 ("3", "۳"), # U+06F3 Extended Arabic-Indic digit three. 83 ("4", "۴"), # U+06F4 Extended Arabic-Indic digit four. 84 ("5", "۵"), # U+06F5 Extended Arabic-Indic digit five. 85 ("6", "۶"), # U+06F6 Extended Arabic-Indic digit six. 86 ("7", "۷"), # U+06F7 Extended Arabic-Indic digit seven. 87 ("8", "۸"), # U+06F8 Extended Arabic-Indic digit eight. 88 ("9", "۹"), # U+06F9 Extended Arabic-Indic digit nine. 89 ) 90 91 ignoretests = { 92 "all": ["simplecaps", "startcaps"], 93 } 94 # TODO: check persian numerics 95 # TODO: zwj and zwnj? 96 97 @classmethod 98 def punctranslate(cls, text): 99 """Implement "French" quotation marks.""" 100 text = super().punctranslate(text) 101 return guillemets(text) 102