1# 2# Copyright 2004-2008,2010 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Filters that strings can be passed through before certain tests. 20""" 21 22import re 23 24from translate.filters import decoration 25from translate.misc import quote 26 27 28def removekdecomments(str1): 29 r"""Remove KDE-style PO comments. 30 31 KDE comments start with ``_:[space]`` and end with a literal ``\n``. 32 Example:: 33 34 "_: comment\n" 35 """ 36 assert isinstance(str1, str) 37 iskdecomment = False 38 lines = str1.split("\n") 39 removelines = [] 40 for linenum in range(len(lines)): 41 line = lines[linenum] 42 if line.startswith("_:"): 43 lines[linenum] = "" 44 iskdecomment = True 45 if iskdecomment: 46 removelines.append(linenum) 47 if line.strip() and not iskdecomment: 48 break 49 if iskdecomment and line.strip().endswith("\\n"): 50 iskdecomment = False 51 lines = [ 52 lines[linenum] for linenum in range(len(lines)) if linenum not in removelines 53 ] 54 return "\n".join(lines) 55 56 57def filteraccelerators(accelmarker): 58 """Returns a function that filters accelerators marked using *accelmarker* 59 from a strings. 60 61 :param string accelmarker: Accelerator marker character 62 :rtype: Function 63 :return: fn(str1, acceplist=None) 64 """ 65 if accelmarker is None: 66 accelmarkerlen = 0 67 else: 68 accelmarkerlen = len(accelmarker) 69 70 def filtermarkedaccelerators(str1, acceptlist=None): 71 """Modifies the accelerators in *str1* marked with the given 72 *accelmarker*, using a given *acceptlist* filter. 73 """ 74 acclocs, badlocs = decoration.findaccelerators(str1, accelmarker, acceptlist) 75 fstr1, pos = "", 0 76 for accelstart, accelerator in acclocs: 77 fstr1 += str1[pos:accelstart] 78 fstr1 += accelerator 79 pos = accelstart + accelmarkerlen + len(accelerator) 80 fstr1 += str1[pos:] 81 return fstr1 82 83 return filtermarkedaccelerators 84 85 86def varname(variable, startmarker, endmarker): 87 r"""Variable filter that returns the variable name without the marking 88 punctuation. 89 90 .. note:: Currently this function simply returns *variable* unchanged, no 91 matter what *\*marker*’s are set to. 92 93 :rtype: String 94 :return: Variable name with the supplied *startmarker* and *endmarker* 95 removed. 96 """ 97 return variable 98 # if the punctuation were included, we'd do the following: 99 if startmarker is None: 100 return variable[: variable.rfind(endmarker)] 101 elif endmarker is None: 102 return variable[variable.find(startmarker) + len(startmarker) :] 103 else: 104 return variable[ 105 variable.find(startmarker) + len(startmarker) : variable.rfind(endmarker) 106 ] 107 108 109def varnone(variable, startmarker, endmarker): 110 """Variable filter that returns an empty string. 111 112 :rtype: String 113 :return: Empty string 114 """ 115 return "" 116 117 118def filtervariables(startmarker, endmarker, varfilter): 119 """Returns a function that filters variables marked using *startmarker* and 120 *endmarker* from a string. 121 122 :param string startmarker: Start of variable marker 123 :param string endmarker: End of variable marker 124 :param Function varfilter: fn(variable, startmarker, endmarker) 125 :rtype: Function 126 :return: fn(str1) 127 """ 128 if startmarker is None: 129 startmarkerlen = 0 130 else: 131 startmarkerlen = len(startmarker) 132 if endmarker is None: 133 endmarkerlen = 0 134 elif type(endmarker) == int: 135 endmarkerlen = 0 136 else: 137 endmarkerlen = len(endmarker) 138 139 def filtermarkedvariables(str1): 140 r"""Modifies the variables in *str1* marked with a given *\*marker*, 141 using a given filter. 142 """ 143 varlocs = decoration.findmarkedvariables(str1, startmarker, endmarker) 144 fstr1, pos = "", 0 145 for varstart, variable in varlocs: 146 fstr1 += str1[pos:varstart] 147 fstr1 += varfilter(variable, startmarker, endmarker) 148 pos = varstart + startmarkerlen + len(variable) + endmarkerlen 149 fstr1 += str1[pos:] 150 return fstr1 151 152 return filtermarkedvariables 153 154 155# a list of special words with punctuation 156# all apostrophes in the middle of the word are handled already 157wordswithpunctuation = ["'n", "'t"] # Afrikaans 158# map all the words to their non-punctified equivalent 159wordswithpunctuation = { 160 word: "".join(filter(str.isalnum, word)) for word in wordswithpunctuation 161} 162 163word_with_apos_re = re.compile(r"(?u)\w+'\w+") 164 165 166def filterwordswithpunctuation(str1): 167 """Goes through a list of known words that have punctuation and removes the 168 punctuation from them. 169 """ 170 if "'" not in str1: 171 return str1 172 occurrences = [] 173 for word, replacement in wordswithpunctuation.items(): 174 occurrences.extend( 175 [(pos, word, replacement) for pos in quote.find_all(str1, word)] 176 ) 177 for match in word_with_apos_re.finditer(str1): 178 word = match.group() 179 replacement = "".join(filter(str.isalnum, word)) 180 occurrences.append((match.start(), word, replacement)) 181 occurrences.sort() 182 if occurrences: 183 lastpos = 0 184 newstr1 = "" 185 for pos, word, replacement in occurrences: 186 newstr1 += str1[lastpos:pos] 187 newstr1 += replacement 188 lastpos = pos + len(word) 189 newstr1 += str1[lastpos:] 190 return newstr1 191 else: 192 return str1 193