1<?php
2
3namespace Egulias\EmailValidator;
4
5use Doctrine\Common\Lexer\AbstractLexer;
6
7class EmailLexer extends AbstractLexer
8{
9    //ASCII values
10    const C_DEL              = 127;
11    const C_NUL              = 0;
12    const S_AT               = 64;
13    const S_BACKSLASH        = 92;
14    const S_DOT              = 46;
15    const S_DQUOTE           = 34;
16    const S_SQUOTE           = 39;
17    const S_BACKTICK         = 96;
18    const S_OPENPARENTHESIS  = 49;
19    const S_CLOSEPARENTHESIS = 261;
20    const S_OPENBRACKET      = 262;
21    const S_CLOSEBRACKET     = 263;
22    const S_HYPHEN           = 264;
23    const S_COLON            = 265;
24    const S_DOUBLECOLON      = 266;
25    const S_SP               = 267;
26    const S_HTAB             = 268;
27    const S_CR               = 269;
28    const S_LF               = 270;
29    const S_IPV6TAG          = 271;
30    const S_LOWERTHAN        = 272;
31    const S_GREATERTHAN      = 273;
32    const S_COMMA            = 274;
33    const S_SEMICOLON        = 275;
34    const S_OPENQBRACKET     = 276;
35    const S_CLOSEQBRACKET    = 277;
36    const S_SLASH            = 278;
37    const S_EMPTY            = null;
38    const GENERIC            = 300;
39    const CRLF               = 301;
40    const INVALID            = 302;
41    const ASCII_INVALID_FROM = 127;
42    const ASCII_INVALID_TO   = 199;
43
44    /**
45     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
46     *
47     * @var array
48     */
49    protected $charValue = array(
50        '('    => self::S_OPENPARENTHESIS,
51        ')'    => self::S_CLOSEPARENTHESIS,
52        '<'    => self::S_LOWERTHAN,
53        '>'    => self::S_GREATERTHAN,
54        '['    => self::S_OPENBRACKET,
55        ']'    => self::S_CLOSEBRACKET,
56        ':'    => self::S_COLON,
57        ';'    => self::S_SEMICOLON,
58        '@'    => self::S_AT,
59        '\\'   => self::S_BACKSLASH,
60        '/'    => self::S_SLASH,
61        ','    => self::S_COMMA,
62        '.'    => self::S_DOT,
63        "'"    => self::S_SQUOTE,
64        "`"    => self::S_BACKTICK,
65        '"'    => self::S_DQUOTE,
66        '-'    => self::S_HYPHEN,
67        '::'   => self::S_DOUBLECOLON,
68        ' '    => self::S_SP,
69        "\t"   => self::S_HTAB,
70        "\r"   => self::S_CR,
71        "\n"   => self::S_LF,
72        "\r\n" => self::CRLF,
73        'IPv6' => self::S_IPV6TAG,
74        '{'    => self::S_OPENQBRACKET,
75        '}'    => self::S_CLOSEQBRACKET,
76        ''     => self::S_EMPTY,
77        '\0'   => self::C_NUL,
78    );
79
80    /**
81     * @var bool
82     */
83    protected $hasInvalidTokens = false;
84
85    /**
86     * @var array
87     *
88     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
89     */
90    protected $previous = [];
91
92    /**
93     * The last matched/seen token.
94     *
95     * @var array
96     *
97     * @psalm-var array{value:string, type:null|int, position:int}
98     */
99    public $token;
100
101    /**
102     * The next token in the input.
103     *
104     * @var array|null
105     */
106    public $lookahead;
107
108    /**
109     * @psalm-var array{value:'', type:null, position:0}
110     */
111    private static $nullToken = [
112        'value' => '',
113        'type' => null,
114        'position' => 0,
115    ];
116
117    public function __construct()
118    {
119        $this->previous = $this->token = self::$nullToken;
120        $this->lookahead = null;
121    }
122
123    /**
124     * @return void
125     */
126    public function reset()
127    {
128        $this->hasInvalidTokens = false;
129        parent::reset();
130        $this->previous = $this->token = self::$nullToken;
131    }
132
133    /**
134     * @return bool
135     */
136    public function hasInvalidTokens()
137    {
138        return $this->hasInvalidTokens;
139    }
140
141    /**
142     * @param int $type
143     * @throws \UnexpectedValueException
144     * @return boolean
145     *
146     * @psalm-suppress InvalidScalarArgument
147     */
148    public function find($type)
149    {
150        $search = clone $this;
151        $search->skipUntil($type);
152
153        if (!$search->lookahead) {
154            throw new \UnexpectedValueException($type . ' not found');
155        }
156        return true;
157    }
158
159    /**
160     * getPrevious
161     *
162     * @return array
163     */
164    public function getPrevious()
165    {
166        return $this->previous;
167    }
168
169    /**
170     * moveNext
171     *
172     * @return boolean
173     */
174    public function moveNext()
175    {
176        $this->previous = $this->token;
177        $hasNext = parent::moveNext();
178        $this->token = $this->token ?: self::$nullToken;
179
180        return $hasNext;
181    }
182
183    /**
184     * Lexical catchable patterns.
185     *
186     * @return string[]
187     */
188    protected function getCatchablePatterns()
189    {
190        return array(
191            '[a-zA-Z_]+[46]?', //ASCII and domain literal
192            '[^\x00-\x7F]',  //UTF-8
193            '[0-9]+',
194            '\r\n',
195            '::',
196            '\s+?',
197            '.',
198            );
199    }
200
201    /**
202     * Lexical non-catchable patterns.
203     *
204     * @return string[]
205     */
206    protected function getNonCatchablePatterns()
207    {
208        return array('[\xA0-\xff]+');
209    }
210
211    /**
212     * Retrieve token type. Also processes the token value if necessary.
213     *
214     * @param string $value
215     * @throws \InvalidArgumentException
216     * @return integer
217     */
218    protected function getType(&$value)
219    {
220        if ($this->isNullType($value)) {
221            return self::C_NUL;
222        }
223
224        if ($this->isValid($value)) {
225            return $this->charValue[$value];
226        }
227
228        if ($this->isUTF8Invalid($value)) {
229            $this->hasInvalidTokens = true;
230            return self::INVALID;
231        }
232
233        return  self::GENERIC;
234    }
235
236    /**
237     * @param string $value
238     *
239     * @return bool
240     */
241    protected function isValid($value)
242    {
243        if (isset($this->charValue[$value])) {
244            return true;
245        }
246
247        return false;
248    }
249
250    /**
251     * @param string $value
252     * @return bool
253     */
254    protected function isNullType($value)
255    {
256        if ($value === "\0") {
257            return true;
258        }
259
260        return false;
261    }
262
263    /**
264     * @param string $value
265     * @return bool
266     */
267    protected function isUTF8Invalid($value)
268    {
269        if (preg_match('/\p{Cc}+/u', $value)) {
270            return true;
271        }
272
273        return false;
274    }
275
276    /**
277     * @return string
278     */
279    protected function getModifiers()
280    {
281        return 'iu';
282    }
283}
284