1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Translation;
13
14use Symfony\Contracts\Translation\TranslatorInterface;
15
16/**
17 * This translator should only be used in a development environment.
18 */
19final class PseudoLocalizationTranslator implements TranslatorInterface
20{
21    private const EXPANSION_CHARACTER = '~';
22
23    private $translator;
24    private $accents;
25    private $expansionFactor;
26    private $brackets;
27    private $parseHTML;
28    private $localizableHTMLAttributes;
29
30    /**
31     * Available options:
32     *  * accents:
33     *      type: boolean
34     *      default: true
35     *      description: replace ASCII characters of the translated string with accented versions or similar characters
36     *      example: if true, "foo" => "ƒöö".
37     *
38     *  * expansion_factor:
39     *      type: float
40     *      default: 1
41     *      validation: it must be greater than or equal to 1
42     *      description: expand the translated string by the given factor with spaces and tildes
43     *      example: if 2, "foo" => "~foo ~"
44     *
45     *  * brackets:
46     *      type: boolean
47     *      default: true
48     *      description: wrap the translated string with brackets
49     *      example: if true, "foo" => "[foo]"
50     *
51     *  * parse_html:
52     *      type: boolean
53     *      default: false
54     *      description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
55     *      warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
56     *
57     *  * localizable_html_attributes:
58     *      type: string[]
59     *      default: []
60     *      description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
61     *      example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
62     */
63    public function __construct(TranslatorInterface $translator, array $options = [])
64    {
65        $this->translator = $translator;
66        $this->accents = $options['accents'] ?? true;
67
68        if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
69            throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
70        }
71
72        $this->brackets = $options['brackets'] ?? true;
73
74        $this->parseHTML = $options['parse_html'] ?? false;
75        if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
76            $this->parseHTML = false;
77        }
78
79        $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
80    }
81
82    /**
83     * {@inheritdoc}
84     */
85    public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null)
86    {
87        $trans = '';
88        $visibleText = '';
89
90        foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
91            if ($visible) {
92                $visibleText .= $text;
93            }
94
95            if (!$localizable) {
96                $trans .= $text;
97
98                continue;
99            }
100
101            $this->addAccents($trans, $text);
102        }
103
104        $this->expand($trans, $visibleText);
105
106        $this->addBrackets($trans);
107
108        return $trans;
109    }
110
111    private function getParts(string $originalTrans): array
112    {
113        if (!$this->parseHTML) {
114            return [[true, true, $originalTrans]];
115        }
116
117        $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
118
119        $useInternalErrors = libxml_use_internal_errors(true);
120
121        $dom = new \DOMDocument();
122        $dom->loadHTML('<trans>'.$html.'</trans>');
123
124        libxml_clear_errors();
125        libxml_use_internal_errors($useInternalErrors);
126
127        return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
128    }
129
130    private function parseNode(\DOMNode $node): array
131    {
132        $parts = [];
133
134        foreach ($node->childNodes as $childNode) {
135            if (!$childNode instanceof \DOMElement) {
136                $parts[] = [true, true, $childNode->nodeValue];
137
138                continue;
139            }
140
141            $parts[] = [false, false, '<'.$childNode->tagName];
142
143            /** @var \DOMAttr $attribute */
144            foreach ($childNode->attributes as $attribute) {
145                $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
146
147                $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
148                foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
149                    if ('' === $match) {
150                        continue;
151                    }
152
153                    $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
154                }
155
156                $parts[] = [false, false, '"'];
157            }
158
159            $parts[] = [false, false, '>'];
160
161            $parts = array_merge($parts, $this->parseNode($childNode, $parts));
162
163            $parts[] = [false, false, '</'.$childNode->tagName.'>'];
164        }
165
166        return $parts;
167    }
168
169    private function addAccents(string &$trans, string $text): void
170    {
171        $trans .= $this->accents ? strtr($text, [
172            ' ' => ' ',
173            '!' => '¡',
174            '"' => '″',
175            '#' => '♯',
176            '$' => '€',
177            '%' => '‰',
178            '&' => '⅋',
179            '\'' => '´',
180            '(' => '{',
181            ')' => '}',
182            '*' => '⁎',
183            '+' => '⁺',
184            ',' => '،',
185            '-' => '‐',
186            '.' => '·',
187            '/' => '⁄',
188            '0' => '⓪',
189            '1' => '①',
190            '2' => '②',
191            '3' => '③',
192            '4' => '④',
193            '5' => '⑤',
194            '6' => '⑥',
195            '7' => '⑦',
196            '8' => '⑧',
197            '9' => '⑨',
198            ':' => '∶',
199            ';' => '⁏',
200            '<' => '≤',
201            '=' => '≂',
202            '>' => '≥',
203            '?' => '¿',
204            '@' => '՞',
205            'A' => 'Å',
206            'B' => 'Ɓ',
207            'C' => 'Ç',
208            'D' => 'Ð',
209            'E' => 'É',
210            'F' => 'Ƒ',
211            'G' => 'Ĝ',
212            'H' => 'Ĥ',
213            'I' => 'Î',
214            'J' => 'Ĵ',
215            'K' => 'Ķ',
216            'L' => 'Ļ',
217            'M' => 'Ṁ',
218            'N' => 'Ñ',
219            'O' => 'Ö',
220            'P' => 'Þ',
221            'Q' => 'Ǫ',
222            'R' => 'Ŕ',
223            'S' => 'Š',
224            'T' => 'Ţ',
225            'U' => 'Û',
226            'V' => 'Ṽ',
227            'W' => 'Ŵ',
228            'X' => 'Ẋ',
229            'Y' => 'Ý',
230            'Z' => 'Ž',
231            '[' => '⁅',
232            '\\' => '∖',
233            ']' => '⁆',
234            '^' => '˄',
235            '_' => '‿',
236            '`' => '‵',
237            'a' => 'å',
238            'b' => 'ƀ',
239            'c' => 'ç',
240            'd' => 'ð',
241            'e' => 'é',
242            'f' => 'ƒ',
243            'g' => 'ĝ',
244            'h' => 'ĥ',
245            'i' => 'î',
246            'j' => 'ĵ',
247            'k' => 'ķ',
248            'l' => 'ļ',
249            'm' => 'ɱ',
250            'n' => 'ñ',
251            'o' => 'ö',
252            'p' => 'þ',
253            'q' => 'ǫ',
254            'r' => 'ŕ',
255            's' => 'š',
256            't' => 'ţ',
257            'u' => 'û',
258            'v' => 'ṽ',
259            'w' => 'ŵ',
260            'x' => 'ẋ',
261            'y' => 'ý',
262            'z' => 'ž',
263            '{' => '(',
264            '|' => '¦',
265            '}' => ')',
266            '~' => '˞',
267        ]) : $text;
268    }
269
270    private function expand(string &$trans, string $visibleText): void
271    {
272        if (1.0 >= $this->expansionFactor) {
273            return;
274        }
275
276        $visibleLength = $this->strlen($visibleText);
277        $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
278        if ($this->brackets) {
279            $missingLength -= 2;
280        }
281
282        if (0 >= $missingLength) {
283            return;
284        }
285
286        $words = [];
287        $wordsCount = 0;
288        foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
289            $wordLength = $this->strlen($word);
290
291            if ($wordLength >= $missingLength) {
292                continue;
293            }
294
295            if (!isset($words[$wordLength])) {
296                $words[$wordLength] = 0;
297            }
298
299            ++$words[$wordLength];
300            ++$wordsCount;
301        }
302
303        if (!$words) {
304            $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
305
306            return;
307        }
308
309        arsort($words, \SORT_NUMERIC);
310
311        $longestWordLength = max(array_keys($words));
312
313        while (true) {
314            $r = mt_rand(1, $wordsCount);
315
316            foreach ($words as $length => $count) {
317                $r -= $count;
318                if ($r <= 0) {
319                    break;
320                }
321            }
322
323            $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
324
325            $missingLength -= $length + 1;
326
327            if (0 === $missingLength) {
328                return;
329            }
330
331            while ($longestWordLength >= $missingLength) {
332                $wordsCount -= $words[$longestWordLength];
333                unset($words[$longestWordLength]);
334
335                if (!$words) {
336                    $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
337
338                    return;
339                }
340
341                $longestWordLength = max(array_keys($words));
342            }
343        }
344    }
345
346    private function addBrackets(string &$trans): void
347    {
348        if (!$this->brackets) {
349            return;
350        }
351
352        $trans = '['.$trans.']';
353    }
354
355    private function strlen(string $s): int
356    {
357        return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
358    }
359}
360