1<?php declare(strict_types=1); 2 3namespace PhpParser\Lexer\TokenEmulator; 4 5use PhpParser\Lexer\Emulative; 6 7final class NumericLiteralSeparatorEmulator extends TokenEmulator 8{ 9 const BIN = '(?:0b[01]+(?:_[01]+)*)'; 10 const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)'; 11 const DEC = '(?:[0-9]+(?:_[0-9]+)*)'; 12 const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')'; 13 const EXP = '(?:e[+-]?' . self::DEC . ')'; 14 const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')'; 15 const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA'; 16 17 public function getPhpVersion(): string 18 { 19 return Emulative::PHP_7_4; 20 } 21 22 public function isEmulationNeeded(string $code) : bool 23 { 24 return preg_match('~[0-9]_[0-9]~', $code) 25 || preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code); 26 } 27 28 public function emulate(string $code, array $tokens): array 29 { 30 // We need to manually iterate and manage a count because we'll change 31 // the tokens array on the way 32 $codeOffset = 0; 33 for ($i = 0, $c = count($tokens); $i < $c; ++$i) { 34 $token = $tokens[$i]; 35 $tokenLen = \strlen(\is_array($token) ? $token[1] : $token); 36 37 if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) { 38 $codeOffset += $tokenLen; 39 continue; 40 } 41 42 $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset); 43 assert($res, "No number at number token position"); 44 45 $match = $matches[0]; 46 $matchLen = \strlen($match); 47 if ($matchLen === $tokenLen) { 48 // Original token already holds the full number. 49 $codeOffset += $tokenLen; 50 continue; 51 } 52 53 $tokenKind = $this->resolveIntegerOrFloatToken($match); 54 $newTokens = [[$tokenKind, $match, $token[2]]]; 55 56 $numTokens = 1; 57 $len = $tokenLen; 58 while ($matchLen > $len) { 59 $nextToken = $tokens[$i + $numTokens]; 60 $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken; 61 $nextTokenLen = \strlen($nextTokenText); 62 63 $numTokens++; 64 if ($matchLen < $len + $nextTokenLen) { 65 // Split trailing characters into a partial token. 66 assert(is_array($nextToken), "Partial token should be an array token"); 67 $partialText = substr($nextTokenText, $matchLen - $len); 68 $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]]; 69 break; 70 } 71 72 $len += $nextTokenLen; 73 } 74 75 array_splice($tokens, $i, $numTokens, $newTokens); 76 $c -= $numTokens - \count($newTokens); 77 $codeOffset += $matchLen; 78 } 79 80 return $tokens; 81 } 82 83 private function resolveIntegerOrFloatToken(string $str): int 84 { 85 $str = str_replace('_', '', $str); 86 87 if (stripos($str, '0b') === 0) { 88 $num = bindec($str); 89 } elseif (stripos($str, '0x') === 0) { 90 $num = hexdec($str); 91 } elseif (stripos($str, '0') === 0 && ctype_digit($str)) { 92 $num = octdec($str); 93 } else { 94 $num = +$str; 95 } 96 97 return is_float($num) ? T_DNUMBER : T_LNUMBER; 98 } 99 100 public function reverseEmulate(string $code, array $tokens): array 101 { 102 // Numeric separators were not legal code previously, don't bother. 103 return $tokens; 104 } 105} 106