1<?php
2
3namespace Egulias\EmailValidator\Parser;
4
5use Egulias\EmailValidator\EmailLexer;
6use Egulias\EmailValidator\Exception\CharNotAllowed;
7use Egulias\EmailValidator\Exception\CommaInDomain;
8use Egulias\EmailValidator\Exception\ConsecutiveAt;
9use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
10use Egulias\EmailValidator\Exception\CRNoLF;
11use Egulias\EmailValidator\Exception\DomainHyphened;
12use Egulias\EmailValidator\Exception\DotAtEnd;
13use Egulias\EmailValidator\Exception\DotAtStart;
14use Egulias\EmailValidator\Exception\ExpectingATEXT;
15use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
16use Egulias\EmailValidator\Exception\ExpectingDTEXT;
17use Egulias\EmailValidator\Exception\NoDomainPart;
18use Egulias\EmailValidator\Exception\UnopenedComment;
19use Egulias\EmailValidator\Warning\AddressLiteral;
20use Egulias\EmailValidator\Warning\CFWSWithFWS;
21use Egulias\EmailValidator\Warning\DeprecatedComment;
22use Egulias\EmailValidator\Warning\DomainLiteral;
23use Egulias\EmailValidator\Warning\DomainTooLong;
24use Egulias\EmailValidator\Warning\IPV6BadChar;
25use Egulias\EmailValidator\Warning\IPV6ColonEnd;
26use Egulias\EmailValidator\Warning\IPV6ColonStart;
27use Egulias\EmailValidator\Warning\IPV6Deprecated;
28use Egulias\EmailValidator\Warning\IPV6DoubleColon;
29use Egulias\EmailValidator\Warning\IPV6GroupCount;
30use Egulias\EmailValidator\Warning\IPV6MaxGroups;
31use Egulias\EmailValidator\Warning\LabelTooLong;
32use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
33use Egulias\EmailValidator\Warning\TLD;
34
35class DomainPart extends Parser
36{
37    const DOMAIN_MAX_LENGTH = 254;
38    const LABEL_MAX_LENGTH = 63;
39
40    /**
41     * @var string
42     */
43    protected $domainPart = '';
44
45    public function parse($domainPart)
46    {
47        $this->lexer->moveNext();
48
49        $this->performDomainStartChecks();
50
51        $domain = $this->doParseDomainPart();
52
53        $prev = $this->lexer->getPrevious();
54        $length = strlen($domain);
55
56        if ($prev['type'] === EmailLexer::S_DOT) {
57            throw new DotAtEnd();
58        }
59        if ($prev['type'] === EmailLexer::S_HYPHEN) {
60            throw new DomainHyphened();
61        }
62        if ($length > self::DOMAIN_MAX_LENGTH) {
63            $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
64        }
65        if ($prev['type'] === EmailLexer::S_CR) {
66            throw new CRLFAtTheEnd();
67        }
68        $this->domainPart = $domain;
69    }
70
71    private function performDomainStartChecks()
72    {
73        $this->checkInvalidTokensAfterAT();
74        $this->checkEmptyDomain();
75
76        if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
77            $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
78            $this->parseDomainComments();
79        }
80    }
81
82    private function checkEmptyDomain()
83    {
84        $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
85            ($this->lexer->token['type'] === EmailLexer::S_SP &&
86            !$this->lexer->isNextToken(EmailLexer::GENERIC));
87
88        if ($thereIsNoDomain) {
89            throw new NoDomainPart();
90        }
91    }
92
93    private function checkInvalidTokensAfterAT()
94    {
95        if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
96            throw new DotAtStart();
97        }
98        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
99            throw new DomainHyphened();
100        }
101    }
102
103    /**
104     * @return string
105     */
106    public function getDomainPart()
107    {
108        return $this->domainPart;
109    }
110
111    /**
112     * @param string $addressLiteral
113     * @param int $maxGroups
114     */
115    public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
116    {
117        $prev = $this->lexer->getPrevious();
118        if ($prev['type'] === EmailLexer::S_COLON) {
119            $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
120        }
121
122        $IPv6       = substr($addressLiteral, 5);
123        //Daniel Marschall's new IPv6 testing strategy
124        $matchesIP  = explode(':', $IPv6);
125        $groupCount = count($matchesIP);
126        $colons     = strpos($IPv6, '::');
127
128        if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
129            $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
130        }
131
132        if ($colons === false) {
133            // We need exactly the right number of groups
134            if ($groupCount !== $maxGroups) {
135                $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
136            }
137            return;
138        }
139
140        if ($colons !== strrpos($IPv6, '::')) {
141            $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
142            return;
143        }
144
145        if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
146            // RFC 4291 allows :: at the start or end of an address
147            //with 7 other groups in addition
148            ++$maxGroups;
149        }
150
151        if ($groupCount > $maxGroups) {
152            $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
153        } elseif ($groupCount === $maxGroups) {
154            $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
155        }
156    }
157
158    /**
159     * @return string
160     */
161    protected function doParseDomainPart()
162    {
163        $domain = '';
164        $label = '';
165        $openedParenthesis = 0;
166        do {
167            $prev = $this->lexer->getPrevious();
168
169            $this->checkNotAllowedChars($this->lexer->token);
170
171            if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
172                $this->parseComments();
173                $openedParenthesis += $this->getOpenedParenthesis();
174                $this->lexer->moveNext();
175                $tmpPrev = $this->lexer->getPrevious();
176                if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
177                    $openedParenthesis--;
178                }
179            }
180            if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
181                if ($openedParenthesis === 0) {
182                    throw new UnopenedComment();
183                } else {
184                    $openedParenthesis--;
185                }
186            }
187
188            $this->checkConsecutiveDots();
189            $this->checkDomainPartExceptions($prev);
190
191            if ($this->hasBrackets()) {
192                $this->parseDomainLiteral();
193            }
194
195            if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
196                $this->checkLabelLength($label);
197                $label = '';
198            } else {
199                $label .= $this->lexer->token['value'];
200            }
201
202            if ($this->isFWS()) {
203                $this->parseFWS();
204            }
205
206            $domain .= $this->lexer->token['value'];
207            $this->lexer->moveNext();
208            if ($this->lexer->token['type'] === EmailLexer::S_SP) {
209                throw new CharNotAllowed();
210            }
211        } while (null !== $this->lexer->token['type']);
212
213        $this->checkLabelLength($label);
214
215        return $domain;
216    }
217
218    private function checkNotAllowedChars(array $token)
219    {
220        $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
221        if (isset($notAllowed[$token['type']])) {
222            throw new CharNotAllowed();
223        }
224    }
225
226    /**
227     * @return string|false
228     */
229    protected function parseDomainLiteral()
230    {
231        if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
232            $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
233        }
234        if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
235            $lexer = clone $this->lexer;
236            $lexer->moveNext();
237            if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
238                $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
239            }
240        }
241
242        return $this->doParseDomainLiteral();
243    }
244
245    /**
246     * @return string|false
247     */
248    protected function doParseDomainLiteral()
249    {
250        $IPv6TAG = false;
251        $addressLiteral = '';
252        do {
253            if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
254                throw new ExpectingDTEXT();
255            }
256
257            if ($this->lexer->token['type'] === EmailLexer::INVALID ||
258                $this->lexer->token['type'] === EmailLexer::C_DEL   ||
259                $this->lexer->token['type'] === EmailLexer::S_LF
260            ) {
261                $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
262            }
263
264            if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
265                throw new ExpectingDTEXT();
266            }
267
268            if ($this->lexer->isNextTokenAny(
269                array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
270            )) {
271                $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
272                $this->parseFWS();
273            }
274
275            if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
276                throw new CRNoLF();
277            }
278
279            if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
280                $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
281                $addressLiteral .= $this->lexer->token['value'];
282                $this->lexer->moveNext();
283                $this->validateQuotedPair();
284            }
285            if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
286                $IPv6TAG = true;
287            }
288            if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
289                break;
290            }
291
292            $addressLiteral .= $this->lexer->token['value'];
293
294        } while ($this->lexer->moveNext());
295
296        $addressLiteral = str_replace('[', '', $addressLiteral);
297        $addressLiteral = $this->checkIPV4Tag($addressLiteral);
298
299        if (false === $addressLiteral) {
300            return $addressLiteral;
301        }
302
303        if (!$IPv6TAG) {
304            $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
305            return $addressLiteral;
306        }
307
308        $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
309
310        $this->checkIPV6Tag($addressLiteral);
311
312        return $addressLiteral;
313    }
314
315    /**
316     * @param string $addressLiteral
317     *
318     * @return string|false
319     */
320    protected function checkIPV4Tag($addressLiteral)
321    {
322        $matchesIP  = array();
323
324        // Extract IPv4 part from the end of the address-literal (if there is one)
325        if (preg_match(
326            '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
327            $addressLiteral,
328            $matchesIP
329        ) > 0
330        ) {
331            $index = strrpos($addressLiteral, $matchesIP[0]);
332            if ($index === 0) {
333                $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
334                return false;
335            }
336            // Convert IPv4 part to IPv6 format for further testing
337            $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0';
338        }
339
340        return $addressLiteral;
341    }
342
343    protected function checkDomainPartExceptions(array $prev)
344    {
345        $invalidDomainTokens = array(
346            EmailLexer::S_DQUOTE => true,
347            EmailLexer::S_SQUOTE => true,
348            EmailLexer::S_BACKTICK => true,
349            EmailLexer::S_SEMICOLON => true,
350            EmailLexer::S_GREATERTHAN => true,
351            EmailLexer::S_LOWERTHAN => true,
352        );
353
354        if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
355            throw new ExpectingATEXT();
356        }
357
358        if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
359            throw new CommaInDomain();
360        }
361
362        if ($this->lexer->token['type'] === EmailLexer::S_AT) {
363            throw new ConsecutiveAt();
364        }
365
366        if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
367            throw new ExpectingATEXT();
368        }
369
370        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
371            throw new DomainHyphened();
372        }
373
374        if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
375            && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
376            throw new ExpectingATEXT();
377        }
378    }
379
380    /**
381     * @return bool
382     */
383    protected function hasBrackets()
384    {
385        if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
386            return false;
387        }
388
389        try {
390            $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
391        } catch (\RuntimeException $e) {
392            throw new ExpectingDomainLiteralClose();
393        }
394
395        return true;
396    }
397
398    /**
399     * @param string $label
400     */
401    protected function checkLabelLength($label)
402    {
403        if ($this->isLabelTooLong($label)) {
404            $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
405        }
406    }
407
408    /**
409     * @param string $label
410     * @return bool
411     */
412    private function isLabelTooLong($label)
413    {
414        if (preg_match('/[^\x00-\x7F]/', $label)) {
415            idn_to_ascii($label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo);
416
417            return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG);
418        }
419
420        return strlen($label) > self::LABEL_MAX_LENGTH;
421    }
422
423    protected function parseDomainComments()
424    {
425        $this->isUnclosedComment();
426        while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
427            $this->warnEscaping();
428            $this->lexer->moveNext();
429        }
430
431        $this->lexer->moveNext();
432        if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
433            throw new ExpectingATEXT();
434        }
435    }
436
437    protected function addTLDWarnings()
438    {
439        if ($this->warnings[DomainLiteral::CODE]) {
440            $this->warnings[TLD::CODE] = new TLD();
441        }
442    }
443}
444