1<?php declare(strict_types=1);
2
3namespace PhpParser\Lexer\TokenEmulator;
4
5use PhpParser\Lexer\Emulative;
6
7final class NumericLiteralSeparatorEmulator extends TokenEmulator
8{
9    const BIN = '(?:0b[01]+(?:_[01]+)*)';
10    const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
11    const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
12    const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
13    const EXP = '(?:e[+-]?' . self::DEC . ')';
14    const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
15    const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
16
17    public function getPhpVersion(): string
18    {
19        return Emulative::PHP_7_4;
20    }
21
22    public function isEmulationNeeded(string $code) : bool
23    {
24        return preg_match('~[0-9]_[0-9]~', $code)
25            || preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code);
26    }
27
28    public function emulate(string $code, array $tokens): array
29    {
30        // We need to manually iterate and manage a count because we'll change
31        // the tokens array on the way
32        $codeOffset = 0;
33        for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
34            $token = $tokens[$i];
35            $tokenLen = \strlen(\is_array($token) ? $token[1] : $token);
36
37            if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) {
38                $codeOffset += $tokenLen;
39                continue;
40            }
41
42            $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset);
43            assert($res, "No number at number token position");
44
45            $match = $matches[0];
46            $matchLen = \strlen($match);
47            if ($matchLen === $tokenLen) {
48                // Original token already holds the full number.
49                $codeOffset += $tokenLen;
50                continue;
51            }
52
53            $tokenKind = $this->resolveIntegerOrFloatToken($match);
54            $newTokens = [[$tokenKind, $match, $token[2]]];
55
56            $numTokens = 1;
57            $len = $tokenLen;
58            while ($matchLen > $len) {
59                $nextToken = $tokens[$i + $numTokens];
60                $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken;
61                $nextTokenLen = \strlen($nextTokenText);
62
63                $numTokens++;
64                if ($matchLen < $len + $nextTokenLen) {
65                    // Split trailing characters into a partial token.
66                    assert(is_array($nextToken), "Partial token should be an array token");
67                    $partialText = substr($nextTokenText, $matchLen - $len);
68                    $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]];
69                    break;
70                }
71
72                $len += $nextTokenLen;
73            }
74
75            array_splice($tokens, $i, $numTokens, $newTokens);
76            $c -= $numTokens - \count($newTokens);
77            $codeOffset += $matchLen;
78        }
79
80        return $tokens;
81    }
82
83    private function resolveIntegerOrFloatToken(string $str): int
84    {
85        $str = str_replace('_', '', $str);
86
87        if (stripos($str, '0b') === 0) {
88            $num = bindec($str);
89        } elseif (stripos($str, '0x') === 0) {
90            $num = hexdec($str);
91        } elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
92            $num = octdec($str);
93        } else {
94            $num = +$str;
95        }
96
97        return is_float($num) ? T_DNUMBER : T_LNUMBER;
98    }
99
100    public function reverseEmulate(string $code, array $tokens): array
101    {
102        // Numeric separators were not legal code previously, don't bother.
103        return $tokens;
104    }
105}
106