1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\Translation\Extractor; 13 14use Symfony\Component\Finder\Finder; 15use Symfony\Component\Translation\MessageCatalogue; 16 17/** 18 * PhpExtractor extracts translation messages from a PHP template. 19 * 20 * @author Michel Salib <michelsalib@hotmail.com> 21 */ 22class PhpExtractor extends AbstractFileExtractor implements ExtractorInterface 23{ 24 public const MESSAGE_TOKEN = 300; 25 public const METHOD_ARGUMENTS_TOKEN = 1000; 26 public const DOMAIN_TOKEN = 1001; 27 28 /** 29 * Prefix for new found message. 30 * 31 * @var string 32 */ 33 private $prefix = ''; 34 35 /** 36 * The sequence that captures translation messages. 37 * 38 * @var array 39 */ 40 protected $sequences = [ 41 [ 42 '->', 43 'trans', 44 '(', 45 self::MESSAGE_TOKEN, 46 ',', 47 self::METHOD_ARGUMENTS_TOKEN, 48 ',', 49 self::DOMAIN_TOKEN, 50 ], 51 [ 52 '->', 53 'trans', 54 '(', 55 self::MESSAGE_TOKEN, 56 ], 57 [ 58 'new', 59 'TranslatableMessage', 60 '(', 61 self::MESSAGE_TOKEN, 62 ',', 63 self::METHOD_ARGUMENTS_TOKEN, 64 ',', 65 self::DOMAIN_TOKEN, 66 ], 67 [ 68 'new', 69 'TranslatableMessage', 70 '(', 71 self::MESSAGE_TOKEN, 72 ], 73 [ 74 'new', 75 '\\', 76 'Symfony', 77 '\\', 78 'Component', 79 '\\', 80 'Translation', 81 '\\', 82 'TranslatableMessage', 83 '(', 84 self::MESSAGE_TOKEN, 85 ',', 86 self::METHOD_ARGUMENTS_TOKEN, 87 ',', 88 self::DOMAIN_TOKEN, 89 ], 90 [ 91 'new', 92 '\Symfony\Component\Translation\TranslatableMessage', 93 '(', 94 self::MESSAGE_TOKEN, 95 ',', 96 self::METHOD_ARGUMENTS_TOKEN, 97 ',', 98 self::DOMAIN_TOKEN, 99 ], 100 [ 101 'new', 102 '\\', 103 'Symfony', 104 '\\', 105 'Component', 106 '\\', 107 'Translation', 108 '\\', 109 'TranslatableMessage', 110 '(', 111 self::MESSAGE_TOKEN, 112 ], 113 [ 114 'new', 115 '\Symfony\Component\Translation\TranslatableMessage', 116 '(', 117 self::MESSAGE_TOKEN, 118 ], 119 [ 120 't', 121 '(', 122 self::MESSAGE_TOKEN, 123 ',', 124 self::METHOD_ARGUMENTS_TOKEN, 125 ',', 126 self::DOMAIN_TOKEN, 127 ], 128 [ 129 't', 130 '(', 131 self::MESSAGE_TOKEN, 132 ], 133 ]; 134 135 /** 136 * {@inheritdoc} 137 */ 138 public function extract($resource, MessageCatalogue $catalog) 139 { 140 $files = $this->extractFiles($resource); 141 foreach ($files as $file) { 142 $this->parseTokens(token_get_all(file_get_contents($file)), $catalog, $file); 143 144 gc_mem_caches(); 145 } 146 } 147 148 /** 149 * {@inheritdoc} 150 */ 151 public function setPrefix(string $prefix) 152 { 153 $this->prefix = $prefix; 154 } 155 156 /** 157 * Normalizes a token. 158 * 159 * @param mixed $token 160 * 161 * @return string|null 162 */ 163 protected function normalizeToken($token) 164 { 165 if (isset($token[1]) && 'b"' !== $token) { 166 return $token[1]; 167 } 168 169 return $token; 170 } 171 172 /** 173 * Seeks to a non-whitespace token. 174 */ 175 private function seekToNextRelevantToken(\Iterator $tokenIterator) 176 { 177 for (; $tokenIterator->valid(); $tokenIterator->next()) { 178 $t = $tokenIterator->current(); 179 if (\T_WHITESPACE !== $t[0]) { 180 break; 181 } 182 } 183 } 184 185 private function skipMethodArgument(\Iterator $tokenIterator) 186 { 187 $openBraces = 0; 188 189 for (; $tokenIterator->valid(); $tokenIterator->next()) { 190 $t = $tokenIterator->current(); 191 192 if ('[' === $t[0] || '(' === $t[0]) { 193 ++$openBraces; 194 } 195 196 if (']' === $t[0] || ')' === $t[0]) { 197 --$openBraces; 198 } 199 200 if ((0 === $openBraces && ',' === $t[0]) || (-1 === $openBraces && ')' === $t[0])) { 201 break; 202 } 203 } 204 } 205 206 /** 207 * Extracts the message from the iterator while the tokens 208 * match allowed message tokens. 209 */ 210 private function getValue(\Iterator $tokenIterator) 211 { 212 $message = ''; 213 $docToken = ''; 214 $docPart = ''; 215 216 for (; $tokenIterator->valid(); $tokenIterator->next()) { 217 $t = $tokenIterator->current(); 218 if ('.' === $t) { 219 // Concatenate with next token 220 continue; 221 } 222 if (!isset($t[1])) { 223 break; 224 } 225 226 switch ($t[0]) { 227 case \T_START_HEREDOC: 228 $docToken = $t[1]; 229 break; 230 case \T_ENCAPSED_AND_WHITESPACE: 231 case \T_CONSTANT_ENCAPSED_STRING: 232 if ('' === $docToken) { 233 $message .= PhpStringTokenParser::parse($t[1]); 234 } else { 235 $docPart = $t[1]; 236 } 237 break; 238 case \T_END_HEREDOC: 239 if ($indentation = strspn($t[1], ' ')) { 240 $docPartWithLineBreaks = $docPart; 241 $docPart = ''; 242 243 foreach (preg_split('~(\r\n|\n|\r)~', $docPartWithLineBreaks, -1, \PREG_SPLIT_DELIM_CAPTURE) as $str) { 244 if (\in_array($str, ["\r\n", "\n", "\r"], true)) { 245 $docPart .= $str; 246 } else { 247 $docPart .= substr($str, $indentation); 248 } 249 } 250 } 251 252 $message .= PhpStringTokenParser::parseDocString($docToken, $docPart); 253 $docToken = ''; 254 $docPart = ''; 255 break; 256 case \T_WHITESPACE: 257 break; 258 default: 259 break 2; 260 } 261 } 262 263 return $message; 264 } 265 266 /** 267 * Extracts trans message from PHP tokens. 268 */ 269 protected function parseTokens(array $tokens, MessageCatalogue $catalog, string $filename) 270 { 271 $tokenIterator = new \ArrayIterator($tokens); 272 273 for ($key = 0; $key < $tokenIterator->count(); ++$key) { 274 foreach ($this->sequences as $sequence) { 275 $message = ''; 276 $domain = 'messages'; 277 $tokenIterator->seek($key); 278 279 foreach ($sequence as $sequenceKey => $item) { 280 $this->seekToNextRelevantToken($tokenIterator); 281 282 if ($this->normalizeToken($tokenIterator->current()) === $item) { 283 $tokenIterator->next(); 284 continue; 285 } elseif (self::MESSAGE_TOKEN === $item) { 286 $message = $this->getValue($tokenIterator); 287 288 if (\count($sequence) === ($sequenceKey + 1)) { 289 break; 290 } 291 } elseif (self::METHOD_ARGUMENTS_TOKEN === $item) { 292 $this->skipMethodArgument($tokenIterator); 293 } elseif (self::DOMAIN_TOKEN === $item) { 294 $domainToken = $this->getValue($tokenIterator); 295 if ('' !== $domainToken) { 296 $domain = $domainToken; 297 } 298 299 break; 300 } else { 301 break; 302 } 303 } 304 305 if ($message) { 306 $catalog->set($message, $this->prefix.$message, $domain); 307 $metadata = $catalog->getMetadata($message, $domain) ?? []; 308 $normalizedFilename = preg_replace('{[\\\\/]+}', '/', $filename); 309 $metadata['sources'][] = $normalizedFilename.':'.$tokens[$key][2]; 310 $catalog->setMetadata($message, $metadata, $domain); 311 break; 312 } 313 } 314 } 315 } 316 317 /** 318 * @return bool 319 * 320 * @throws \InvalidArgumentException 321 */ 322 protected function canBeExtracted(string $file) 323 { 324 return $this->isFile($file) && 'php' === pathinfo($file, \PATHINFO_EXTENSION); 325 } 326 327 /** 328 * {@inheritdoc} 329 */ 330 protected function extractFromDirectory($directory) 331 { 332 $finder = new Finder(); 333 334 return $finder->files()->name('*.php')->in($directory); 335 } 336} 337