1#
2# Copyright 2004-2008,2010 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Filters that strings can be passed through before certain tests.
20"""
21
22import re
23
24from translate.filters import decoration
25from translate.misc import quote
26
27
28def removekdecomments(str1):
29    r"""Remove KDE-style PO comments.
30
31    KDE comments start with ``_:[space]`` and end with a literal ``\n``.
32    Example::
33
34      "_: comment\n"
35    """
36    assert isinstance(str1, str)
37    iskdecomment = False
38    lines = str1.split("\n")
39    removelines = []
40    for linenum in range(len(lines)):
41        line = lines[linenum]
42        if line.startswith("_:"):
43            lines[linenum] = ""
44            iskdecomment = True
45        if iskdecomment:
46            removelines.append(linenum)
47        if line.strip() and not iskdecomment:
48            break
49        if iskdecomment and line.strip().endswith("\\n"):
50            iskdecomment = False
51    lines = [
52        lines[linenum] for linenum in range(len(lines)) if linenum not in removelines
53    ]
54    return "\n".join(lines)
55
56
57def filteraccelerators(accelmarker):
58    """Returns a function that filters accelerators marked using *accelmarker*
59    from a strings.
60
61    :param string accelmarker: Accelerator marker character
62    :rtype: Function
63    :return: fn(str1, acceplist=None)
64    """
65    if accelmarker is None:
66        accelmarkerlen = 0
67    else:
68        accelmarkerlen = len(accelmarker)
69
70    def filtermarkedaccelerators(str1, acceptlist=None):
71        """Modifies the accelerators in *str1* marked with the given
72        *accelmarker*, using a given *acceptlist* filter.
73        """
74        acclocs, badlocs = decoration.findaccelerators(str1, accelmarker, acceptlist)
75        fstr1, pos = "", 0
76        for accelstart, accelerator in acclocs:
77            fstr1 += str1[pos:accelstart]
78            fstr1 += accelerator
79            pos = accelstart + accelmarkerlen + len(accelerator)
80        fstr1 += str1[pos:]
81        return fstr1
82
83    return filtermarkedaccelerators
84
85
86def varname(variable, startmarker, endmarker):
87    r"""Variable filter that returns the variable name without the marking
88    punctuation.
89
90    .. note:: Currently this function simply returns *variable* unchanged, no
91       matter what *\*marker*’s are set to.
92
93    :rtype: String
94    :return: Variable name with the supplied *startmarker* and *endmarker*
95             removed.
96    """
97    return variable
98    # if the punctuation were included, we'd do the following:
99    if startmarker is None:
100        return variable[: variable.rfind(endmarker)]
101    elif endmarker is None:
102        return variable[variable.find(startmarker) + len(startmarker) :]
103    else:
104        return variable[
105            variable.find(startmarker) + len(startmarker) : variable.rfind(endmarker)
106        ]
107
108
109def varnone(variable, startmarker, endmarker):
110    """Variable filter that returns an empty string.
111
112    :rtype: String
113    :return: Empty string
114    """
115    return ""
116
117
118def filtervariables(startmarker, endmarker, varfilter):
119    """Returns a function that filters variables marked using *startmarker* and
120    *endmarker* from a string.
121
122    :param string startmarker: Start of variable marker
123    :param string endmarker: End of variable marker
124    :param Function varfilter: fn(variable, startmarker, endmarker)
125    :rtype: Function
126    :return: fn(str1)
127    """
128    if startmarker is None:
129        startmarkerlen = 0
130    else:
131        startmarkerlen = len(startmarker)
132    if endmarker is None:
133        endmarkerlen = 0
134    elif type(endmarker) == int:
135        endmarkerlen = 0
136    else:
137        endmarkerlen = len(endmarker)
138
139    def filtermarkedvariables(str1):
140        r"""Modifies the variables in *str1* marked with a given *\*marker*,
141        using a given filter.
142        """
143        varlocs = decoration.findmarkedvariables(str1, startmarker, endmarker)
144        fstr1, pos = "", 0
145        for varstart, variable in varlocs:
146            fstr1 += str1[pos:varstart]
147            fstr1 += varfilter(variable, startmarker, endmarker)
148            pos = varstart + startmarkerlen + len(variable) + endmarkerlen
149        fstr1 += str1[pos:]
150        return fstr1
151
152    return filtermarkedvariables
153
154
155# a list of special words with punctuation
156# all apostrophes in the middle of the word are handled already
157wordswithpunctuation = ["'n", "'t"]  # Afrikaans
158# map all the words to their non-punctified equivalent
159wordswithpunctuation = {
160    word: "".join(filter(str.isalnum, word)) for word in wordswithpunctuation
161}
162
163word_with_apos_re = re.compile(r"(?u)\w+'\w+")
164
165
166def filterwordswithpunctuation(str1):
167    """Goes through a list of known words that have punctuation and removes the
168    punctuation from them.
169    """
170    if "'" not in str1:
171        return str1
172    occurrences = []
173    for word, replacement in wordswithpunctuation.items():
174        occurrences.extend(
175            [(pos, word, replacement) for pos in quote.find_all(str1, word)]
176        )
177    for match in word_with_apos_re.finditer(str1):
178        word = match.group()
179        replacement = "".join(filter(str.isalnum, word))
180        occurrences.append((match.start(), word, replacement))
181    occurrences.sort()
182    if occurrences:
183        lastpos = 0
184        newstr1 = ""
185        for pos, word, replacement in occurrences:
186            newstr1 += str1[lastpos:pos]
187            newstr1 += replacement
188            lastpos = pos + len(word)
189        newstr1 += str1[lastpos:]
190        return newstr1
191    else:
192        return str1
193