1<?php
2
3namespace Egulias\EmailValidator\Parser;
4
5use Egulias\EmailValidator\EmailLexer;
6use Egulias\EmailValidator\Exception\CharNotAllowed;
7use Egulias\EmailValidator\Exception\CommaInDomain;
8use Egulias\EmailValidator\Exception\ConsecutiveAt;
9use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
10use Egulias\EmailValidator\Exception\CRNoLF;
11use Egulias\EmailValidator\Exception\DomainHyphened;
12use Egulias\EmailValidator\Exception\DotAtEnd;
13use Egulias\EmailValidator\Exception\DotAtStart;
14use Egulias\EmailValidator\Exception\ExpectingATEXT;
15use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
16use Egulias\EmailValidator\Exception\ExpectingDTEXT;
17use Egulias\EmailValidator\Exception\NoDomainPart;
18use Egulias\EmailValidator\Exception\UnopenedComment;
19use Egulias\EmailValidator\Warning\AddressLiteral;
20use Egulias\EmailValidator\Warning\CFWSWithFWS;
21use Egulias\EmailValidator\Warning\DeprecatedComment;
22use Egulias\EmailValidator\Warning\DomainLiteral;
23use Egulias\EmailValidator\Warning\DomainTooLong;
24use Egulias\EmailValidator\Warning\IPV6BadChar;
25use Egulias\EmailValidator\Warning\IPV6ColonEnd;
26use Egulias\EmailValidator\Warning\IPV6ColonStart;
27use Egulias\EmailValidator\Warning\IPV6Deprecated;
28use Egulias\EmailValidator\Warning\IPV6DoubleColon;
29use Egulias\EmailValidator\Warning\IPV6GroupCount;
30use Egulias\EmailValidator\Warning\IPV6MaxGroups;
31use Egulias\EmailValidator\Warning\LabelTooLong;
32use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
33use Egulias\EmailValidator\Warning\TLD;
34
35class DomainPart extends Parser
36{
37    const DOMAIN_MAX_LENGTH = 254;
38
39    /**
40     * @var string
41     */
42    protected $domainPart = '';
43
44    public function parse($domainPart)
45    {
46        $this->lexer->moveNext();
47
48        $this->performDomainStartChecks();
49
50        $domain = $this->doParseDomainPart();
51
52        $prev = $this->lexer->getPrevious();
53        $length = strlen($domain);
54
55        if ($prev['type'] === EmailLexer::S_DOT) {
56            throw new DotAtEnd();
57        }
58        if ($prev['type'] === EmailLexer::S_HYPHEN) {
59            throw new DomainHyphened();
60        }
61        if ($length > self::DOMAIN_MAX_LENGTH) {
62            $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
63        }
64        if ($prev['type'] === EmailLexer::S_CR) {
65            throw new CRLFAtTheEnd();
66        }
67        $this->domainPart = $domain;
68    }
69
70    private function performDomainStartChecks()
71    {
72        $this->checkInvalidTokensAfterAT();
73        $this->checkEmptyDomain();
74
75        if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
76            $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
77            $this->parseDomainComments();
78        }
79    }
80
81    private function checkEmptyDomain()
82    {
83        $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
84            ($this->lexer->token['type'] === EmailLexer::S_SP &&
85            !$this->lexer->isNextToken(EmailLexer::GENERIC));
86
87        if ($thereIsNoDomain) {
88            throw new NoDomainPart();
89        }
90    }
91
92    private function checkInvalidTokensAfterAT()
93    {
94        if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
95            throw new DotAtStart();
96        }
97        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
98            throw new DomainHyphened();
99        }
100    }
101
102    /**
103     * @return string
104     */
105    public function getDomainPart()
106    {
107        return $this->domainPart;
108    }
109
110    /**
111     * @param string $addressLiteral
112     * @param int $maxGroups
113     */
114    public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
115    {
116        $prev = $this->lexer->getPrevious();
117        if ($prev['type'] === EmailLexer::S_COLON) {
118            $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
119        }
120
121        $IPv6       = substr($addressLiteral, 5);
122        //Daniel Marschall's new IPv6 testing strategy
123        $matchesIP  = explode(':', $IPv6);
124        $groupCount = count($matchesIP);
125        $colons     = strpos($IPv6, '::');
126
127        if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
128            $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
129        }
130
131        if ($colons === false) {
132            // We need exactly the right number of groups
133            if ($groupCount !== $maxGroups) {
134                $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
135            }
136            return;
137        }
138
139        if ($colons !== strrpos($IPv6, '::')) {
140            $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
141            return;
142        }
143
144        if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
145            // RFC 4291 allows :: at the start or end of an address
146            //with 7 other groups in addition
147            ++$maxGroups;
148        }
149
150        if ($groupCount > $maxGroups) {
151            $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
152        } elseif ($groupCount === $maxGroups) {
153            $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
154        }
155    }
156
157    /**
158     * @return string
159     */
160    protected function doParseDomainPart()
161    {
162        $domain = '';
163        $openedParenthesis = 0;
164        do {
165            $prev = $this->lexer->getPrevious();
166
167            $this->checkNotAllowedChars($this->lexer->token);
168
169            if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
170                $this->parseComments();
171                $openedParenthesis += $this->getOpenedParenthesis();
172                $this->lexer->moveNext();
173                $tmpPrev = $this->lexer->getPrevious();
174                if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
175                    $openedParenthesis--;
176                }
177            }
178            if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
179                if ($openedParenthesis === 0) {
180                    throw new UnopenedComment();
181                } else {
182                    $openedParenthesis--;
183                }
184            }
185
186            $this->checkConsecutiveDots();
187            $this->checkDomainPartExceptions($prev);
188
189            if ($this->hasBrackets()) {
190                $this->parseDomainLiteral();
191            }
192
193            $this->checkLabelLength($prev);
194
195            if ($this->isFWS()) {
196                $this->parseFWS();
197            }
198
199            $domain .= $this->lexer->token['value'];
200            $this->lexer->moveNext();
201        } while (null !== $this->lexer->token['type']);
202
203        return $domain;
204    }
205
206    private function checkNotAllowedChars(array $token)
207    {
208        $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
209        if (isset($notAllowed[$token['type']])) {
210            throw new CharNotAllowed();
211        }
212    }
213
214    /**
215     * @return string|false
216     */
217    protected function parseDomainLiteral()
218    {
219        if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
220            $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
221        }
222        if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
223            $lexer = clone $this->lexer;
224            $lexer->moveNext();
225            if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
226                $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
227            }
228        }
229
230        return $this->doParseDomainLiteral();
231    }
232
233    /**
234     * @return string|false
235     */
236    protected function doParseDomainLiteral()
237    {
238        $IPv6TAG = false;
239        $addressLiteral = '';
240        do {
241            if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
242                throw new ExpectingDTEXT();
243            }
244
245            if ($this->lexer->token['type'] === EmailLexer::INVALID ||
246                $this->lexer->token['type'] === EmailLexer::C_DEL   ||
247                $this->lexer->token['type'] === EmailLexer::S_LF
248            ) {
249                $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
250            }
251
252            if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
253                throw new ExpectingDTEXT();
254            }
255
256            if ($this->lexer->isNextTokenAny(
257                array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
258            )) {
259                $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
260                $this->parseFWS();
261            }
262
263            if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
264                throw new CRNoLF();
265            }
266
267            if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
268                $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
269                $addressLiteral .= $this->lexer->token['value'];
270                $this->lexer->moveNext();
271                $this->validateQuotedPair();
272            }
273            if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
274                $IPv6TAG = true;
275            }
276            if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
277                break;
278            }
279
280            $addressLiteral .= $this->lexer->token['value'];
281
282        } while ($this->lexer->moveNext());
283
284        $addressLiteral = str_replace('[', '', $addressLiteral);
285        $addressLiteral = $this->checkIPV4Tag($addressLiteral);
286
287        if (false === $addressLiteral) {
288            return $addressLiteral;
289        }
290
291        if (!$IPv6TAG) {
292            $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
293            return $addressLiteral;
294        }
295
296        $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
297
298        $this->checkIPV6Tag($addressLiteral);
299
300        return $addressLiteral;
301    }
302
303    /**
304     * @param string $addressLiteral
305     *
306     * @return string|false
307     */
308    protected function checkIPV4Tag($addressLiteral)
309    {
310        $matchesIP  = array();
311
312        // Extract IPv4 part from the end of the address-literal (if there is one)
313        if (preg_match(
314            '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
315            $addressLiteral,
316            $matchesIP
317        ) > 0
318        ) {
319            $index = strrpos($addressLiteral, $matchesIP[0]);
320            if ($index === 0) {
321                $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
322                return false;
323            }
324            // Convert IPv4 part to IPv6 format for further testing
325            $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0';
326        }
327
328        return $addressLiteral;
329    }
330
331    protected function checkDomainPartExceptions(array $prev)
332    {
333        $invalidDomainTokens = array(
334            EmailLexer::S_DQUOTE => true,
335            EmailLexer::S_SEMICOLON => true,
336            EmailLexer::S_GREATERTHAN => true,
337            EmailLexer::S_LOWERTHAN => true,
338        );
339
340        if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
341            throw new ExpectingATEXT();
342        }
343
344        if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
345            throw new CommaInDomain();
346        }
347
348        if ($this->lexer->token['type'] === EmailLexer::S_AT) {
349            throw new ConsecutiveAt();
350        }
351
352        if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
353            throw new ExpectingATEXT();
354        }
355
356        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
357            throw new DomainHyphened();
358        }
359
360        if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
361            && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
362            throw new ExpectingATEXT();
363        }
364    }
365
366    /**
367     * @return bool
368     */
369    protected function hasBrackets()
370    {
371        if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
372            return false;
373        }
374
375        try {
376            $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
377        } catch (\RuntimeException $e) {
378            throw new ExpectingDomainLiteralClose();
379        }
380
381        return true;
382    }
383
384    protected function checkLabelLength(array $prev)
385    {
386        if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
387            $prev['type'] === EmailLexer::GENERIC &&
388            strlen($prev['value']) > 63
389        ) {
390            $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
391        }
392    }
393
394    protected function parseDomainComments()
395    {
396        $this->isUnclosedComment();
397        while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
398            $this->warnEscaping();
399            $this->lexer->moveNext();
400        }
401
402        $this->lexer->moveNext();
403        if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
404            throw new ExpectingATEXT();
405        }
406    }
407
408    protected function addTLDWarnings()
409    {
410        if ($this->warnings[DomainLiteral::CODE]) {
411            $this->warnings[TLD::CODE] = new TLD();
412        }
413    }
414}
415