1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Translation\Extractor;
13
14use Symfony\Component\Finder\Finder;
15use Symfony\Component\Translation\MessageCatalogue;
16
17/**
18 * PhpExtractor extracts translation messages from a PHP template.
19 *
20 * @author Michel Salib <michelsalib@hotmail.com>
21 */
22class PhpExtractor extends AbstractFileExtractor implements ExtractorInterface
23{
24    public const MESSAGE_TOKEN = 300;
25    public const METHOD_ARGUMENTS_TOKEN = 1000;
26    public const DOMAIN_TOKEN = 1001;
27
28    /**
29     * Prefix for new found message.
30     *
31     * @var string
32     */
33    private $prefix = '';
34
35    /**
36     * The sequence that captures translation messages.
37     *
38     * @var array
39     */
40    protected $sequences = [
41        [
42            '->',
43            'trans',
44            '(',
45            self::MESSAGE_TOKEN,
46            ',',
47            self::METHOD_ARGUMENTS_TOKEN,
48            ',',
49            self::DOMAIN_TOKEN,
50        ],
51        [
52            '->',
53            'trans',
54            '(',
55            self::MESSAGE_TOKEN,
56        ],
57        [
58            'new',
59            'TranslatableMessage',
60            '(',
61            self::MESSAGE_TOKEN,
62            ',',
63            self::METHOD_ARGUMENTS_TOKEN,
64            ',',
65            self::DOMAIN_TOKEN,
66        ],
67        [
68            'new',
69            'TranslatableMessage',
70            '(',
71            self::MESSAGE_TOKEN,
72        ],
73        [
74            'new',
75            '\\',
76            'Symfony',
77            '\\',
78            'Component',
79            '\\',
80            'Translation',
81            '\\',
82            'TranslatableMessage',
83            '(',
84            self::MESSAGE_TOKEN,
85            ',',
86            self::METHOD_ARGUMENTS_TOKEN,
87            ',',
88            self::DOMAIN_TOKEN,
89        ],
90        [
91            'new',
92            '\Symfony\Component\Translation\TranslatableMessage',
93            '(',
94            self::MESSAGE_TOKEN,
95            ',',
96            self::METHOD_ARGUMENTS_TOKEN,
97            ',',
98            self::DOMAIN_TOKEN,
99        ],
100        [
101            'new',
102            '\\',
103            'Symfony',
104            '\\',
105            'Component',
106            '\\',
107            'Translation',
108            '\\',
109            'TranslatableMessage',
110            '(',
111            self::MESSAGE_TOKEN,
112        ],
113        [
114            'new',
115            '\Symfony\Component\Translation\TranslatableMessage',
116            '(',
117            self::MESSAGE_TOKEN,
118        ],
119        [
120            't',
121            '(',
122            self::MESSAGE_TOKEN,
123            ',',
124            self::METHOD_ARGUMENTS_TOKEN,
125            ',',
126            self::DOMAIN_TOKEN,
127        ],
128        [
129            't',
130            '(',
131            self::MESSAGE_TOKEN,
132        ],
133    ];
134
135    /**
136     * {@inheritdoc}
137     */
138    public function extract($resource, MessageCatalogue $catalog)
139    {
140        $files = $this->extractFiles($resource);
141        foreach ($files as $file) {
142            $this->parseTokens(token_get_all(file_get_contents($file)), $catalog, $file);
143
144            gc_mem_caches();
145        }
146    }
147
148    /**
149     * {@inheritdoc}
150     */
151    public function setPrefix(string $prefix)
152    {
153        $this->prefix = $prefix;
154    }
155
156    /**
157     * Normalizes a token.
158     *
159     * @param mixed $token
160     *
161     * @return string|null
162     */
163    protected function normalizeToken($token)
164    {
165        if (isset($token[1]) && 'b"' !== $token) {
166            return $token[1];
167        }
168
169        return $token;
170    }
171
172    /**
173     * Seeks to a non-whitespace token.
174     */
175    private function seekToNextRelevantToken(\Iterator $tokenIterator)
176    {
177        for (; $tokenIterator->valid(); $tokenIterator->next()) {
178            $t = $tokenIterator->current();
179            if (\T_WHITESPACE !== $t[0]) {
180                break;
181            }
182        }
183    }
184
185    private function skipMethodArgument(\Iterator $tokenIterator)
186    {
187        $openBraces = 0;
188
189        for (; $tokenIterator->valid(); $tokenIterator->next()) {
190            $t = $tokenIterator->current();
191
192            if ('[' === $t[0] || '(' === $t[0]) {
193                ++$openBraces;
194            }
195
196            if (']' === $t[0] || ')' === $t[0]) {
197                --$openBraces;
198            }
199
200            if ((0 === $openBraces && ',' === $t[0]) || (-1 === $openBraces && ')' === $t[0])) {
201                break;
202            }
203        }
204    }
205
206    /**
207     * Extracts the message from the iterator while the tokens
208     * match allowed message tokens.
209     */
210    private function getValue(\Iterator $tokenIterator)
211    {
212        $message = '';
213        $docToken = '';
214        $docPart = '';
215
216        for (; $tokenIterator->valid(); $tokenIterator->next()) {
217            $t = $tokenIterator->current();
218            if ('.' === $t) {
219                // Concatenate with next token
220                continue;
221            }
222            if (!isset($t[1])) {
223                break;
224            }
225
226            switch ($t[0]) {
227                case \T_START_HEREDOC:
228                    $docToken = $t[1];
229                    break;
230                case \T_ENCAPSED_AND_WHITESPACE:
231                case \T_CONSTANT_ENCAPSED_STRING:
232                    if ('' === $docToken) {
233                        $message .= PhpStringTokenParser::parse($t[1]);
234                    } else {
235                        $docPart = $t[1];
236                    }
237                    break;
238                case \T_END_HEREDOC:
239                    if ($indentation = strspn($t[1], ' ')) {
240                        $docPartWithLineBreaks = $docPart;
241                        $docPart = '';
242
243                        foreach (preg_split('~(\r\n|\n|\r)~', $docPartWithLineBreaks, -1, \PREG_SPLIT_DELIM_CAPTURE) as $str) {
244                            if (\in_array($str, ["\r\n", "\n", "\r"], true)) {
245                                $docPart .= $str;
246                            } else {
247                                $docPart .= substr($str, $indentation);
248                            }
249                        }
250                    }
251
252                    $message .= PhpStringTokenParser::parseDocString($docToken, $docPart);
253                    $docToken = '';
254                    $docPart = '';
255                    break;
256                case \T_WHITESPACE:
257                    break;
258                default:
259                    break 2;
260            }
261        }
262
263        return $message;
264    }
265
266    /**
267     * Extracts trans message from PHP tokens.
268     */
269    protected function parseTokens(array $tokens, MessageCatalogue $catalog, string $filename)
270    {
271        $tokenIterator = new \ArrayIterator($tokens);
272
273        for ($key = 0; $key < $tokenIterator->count(); ++$key) {
274            foreach ($this->sequences as $sequence) {
275                $message = '';
276                $domain = 'messages';
277                $tokenIterator->seek($key);
278
279                foreach ($sequence as $sequenceKey => $item) {
280                    $this->seekToNextRelevantToken($tokenIterator);
281
282                    if ($this->normalizeToken($tokenIterator->current()) === $item) {
283                        $tokenIterator->next();
284                        continue;
285                    } elseif (self::MESSAGE_TOKEN === $item) {
286                        $message = $this->getValue($tokenIterator);
287
288                        if (\count($sequence) === ($sequenceKey + 1)) {
289                            break;
290                        }
291                    } elseif (self::METHOD_ARGUMENTS_TOKEN === $item) {
292                        $this->skipMethodArgument($tokenIterator);
293                    } elseif (self::DOMAIN_TOKEN === $item) {
294                        $domainToken = $this->getValue($tokenIterator);
295                        if ('' !== $domainToken) {
296                            $domain = $domainToken;
297                        }
298
299                        break;
300                    } else {
301                        break;
302                    }
303                }
304
305                if ($message) {
306                    $catalog->set($message, $this->prefix.$message, $domain);
307                    $metadata = $catalog->getMetadata($message, $domain) ?? [];
308                    $normalizedFilename = preg_replace('{[\\\\/]+}', '/', $filename);
309                    $metadata['sources'][] = $normalizedFilename.':'.$tokens[$key][2];
310                    $catalog->setMetadata($message, $metadata, $domain);
311                    break;
312                }
313            }
314        }
315    }
316
317    /**
318     * @return bool
319     *
320     * @throws \InvalidArgumentException
321     */
322    protected function canBeExtracted(string $file)
323    {
324        return $this->isFile($file) && 'php' === pathinfo($file, \PATHINFO_EXTENSION);
325    }
326
327    /**
328     * {@inheritdoc}
329     */
330    protected function extractFromDirectory($directory)
331    {
332        $finder = new Finder();
333
334        return $finder->files()->name('*.php')->in($directory);
335    }
336}
337