1<?php
2
3namespace Egulias\EmailValidator\Parser;
4
5use Egulias\EmailValidator\EmailLexer;
6use Egulias\EmailValidator\Warning\TLD;
7use Egulias\EmailValidator\Result\Result;
8use Egulias\EmailValidator\Result\ValidEmail;
9use Egulias\EmailValidator\Result\InvalidEmail;
10use Egulias\EmailValidator\Result\Reason\DotAtEnd;
11use Egulias\EmailValidator\Result\Reason\DotAtStart;
12use Egulias\EmailValidator\Warning\DeprecatedComment;
13use Egulias\EmailValidator\Result\Reason\CRLFAtTheEnd;
14use Egulias\EmailValidator\Result\Reason\LabelTooLong;
15use Egulias\EmailValidator\Result\Reason\NoDomainPart;
16use Egulias\EmailValidator\Result\Reason\ConsecutiveAt;
17use Egulias\EmailValidator\Result\Reason\DomainTooLong;
18use Egulias\EmailValidator\Result\Reason\CharNotAllowed;
19use Egulias\EmailValidator\Result\Reason\DomainHyphened;
20use Egulias\EmailValidator\Result\Reason\ExpectingATEXT;
21use Egulias\EmailValidator\Parser\CommentStrategy\DomainComment;
22use Egulias\EmailValidator\Result\Reason\ExpectingDomainLiteralClose;
23use Egulias\EmailValidator\Parser\DomainLiteral as DomainLiteralParser;
24
25class DomainPart extends PartParser
26{
27    const DOMAIN_MAX_LENGTH = 253;
28    const LABEL_MAX_LENGTH = 63;
29
30    /**
31     * @var string
32     */
33    protected $domainPart = '';
34
35    /**
36     * @var string
37     */
38    protected $label = '';
39
40    public function parse() : Result
41    {
42        $this->lexer->clearRecorded();
43        $this->lexer->startRecording();
44
45        $this->lexer->moveNext();
46
47        $domainChecks = $this->performDomainStartChecks();
48        if ($domainChecks->isInvalid()) {
49            return $domainChecks;
50        }
51
52        if ($this->lexer->token['type'] === EmailLexer::S_AT) {
53            return new InvalidEmail(new ConsecutiveAt(), $this->lexer->token['value']);
54        }
55
56        $result = $this->doParseDomainPart();
57        if ($result->isInvalid()) {
58            return $result;
59        }
60
61        $end = $this->checkEndOfDomain();
62        if ($end->isInvalid()) {
63            return $end;
64        }
65
66        $this->lexer->stopRecording();
67        $this->domainPart = $this->lexer->getAccumulatedValues();
68
69        $length = strlen($this->domainPart);
70        if ($length > self::DOMAIN_MAX_LENGTH) {
71            return new InvalidEmail(new DomainTooLong(), $this->lexer->token['value']);
72        }
73
74        return new ValidEmail();
75    }
76
77    private function checkEndOfDomain() : Result
78    {
79        $prev = $this->lexer->getPrevious();
80        if ($prev['type'] === EmailLexer::S_DOT) {
81            return new InvalidEmail(new DotAtEnd(), $this->lexer->token['value']);
82        }
83        if ($prev['type'] === EmailLexer::S_HYPHEN) {
84            return new InvalidEmail(new DomainHyphened('Hypen found at the end of the domain'), $prev['value']);
85        }
86
87        if ($this->lexer->token['type'] === EmailLexer::S_SP) {
88            return new InvalidEmail(new CRLFAtTheEnd(), $prev['value']);
89        }
90        return new ValidEmail();
91
92    }
93
94    private function performDomainStartChecks() : Result
95    {
96        $invalidTokens = $this->checkInvalidTokensAfterAT();
97        if ($invalidTokens->isInvalid()) {
98            return $invalidTokens;
99        }
100
101        $missingDomain = $this->checkEmptyDomain();
102        if ($missingDomain->isInvalid()) {
103            return $missingDomain;
104        }
105
106        if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
107            $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
108        }
109        return new ValidEmail();
110    }
111
112    private function checkEmptyDomain() : Result
113    {
114        $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
115            ($this->lexer->token['type'] === EmailLexer::S_SP &&
116            !$this->lexer->isNextToken(EmailLexer::GENERIC));
117
118        if ($thereIsNoDomain) {
119            return new InvalidEmail(new NoDomainPart(), $this->lexer->token['value']);
120        }
121
122        return new ValidEmail();
123    }
124
125    private function checkInvalidTokensAfterAT() : Result
126    {
127        if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
128            return new InvalidEmail(new DotAtStart(), $this->lexer->token['value']);
129        }
130        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
131            return new InvalidEmail(new DomainHyphened('After AT'), $this->lexer->token['value']);
132        }
133        return new ValidEmail();
134    }
135
136    protected function parseComments(): Result
137    {
138        $commentParser = new Comment($this->lexer, new DomainComment());
139        $result = $commentParser->parse();
140        $this->warnings = array_merge($this->warnings, $commentParser->getWarnings());
141
142        return $result;
143    }
144
145    protected function doParseDomainPart() : Result
146    {
147        $tldMissing = true;
148        $hasComments = false;
149        $domain = '';
150        do {
151            $prev = $this->lexer->getPrevious();
152
153            $notAllowedChars = $this->checkNotAllowedChars($this->lexer->token);
154            if ($notAllowedChars->isInvalid()) {
155                return $notAllowedChars;
156            }
157
158            if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS ||
159                $this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS ) {
160                $hasComments = true;
161                $commentsResult = $this->parseComments();
162
163                //Invalid comment parsing
164                if($commentsResult->isInvalid()) {
165                    return $commentsResult;
166                }
167            }
168
169            $dotsResult = $this->checkConsecutiveDots();
170            if ($dotsResult->isInvalid()) {
171                return $dotsResult;
172            }
173
174            if ($this->lexer->token['type'] === EmailLexer::S_OPENBRACKET) {
175                $literalResult = $this->parseDomainLiteral();
176
177                $this->addTLDWarnings($tldMissing);
178                return $literalResult;
179            }
180
181                $labelCheck = $this->checkLabelLength();
182                if ($labelCheck->isInvalid()) {
183                    return $labelCheck;
184                }
185
186            $FwsResult = $this->parseFWS();
187            if($FwsResult->isInvalid()) {
188                return $FwsResult;
189            }
190
191            $domain .= $this->lexer->token['value'];
192
193            if ($this->lexer->token['type'] === EmailLexer::S_DOT && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
194                $tldMissing = false;
195            }
196
197            $exceptionsResult = $this->checkDomainPartExceptions($prev, $hasComments);
198            if ($exceptionsResult->isInvalid()) {
199                return $exceptionsResult;
200            }
201            $this->lexer->moveNext();
202
203        } while (null !== $this->lexer->token['type']);
204
205        $labelCheck = $this->checkLabelLength(true);
206        if ($labelCheck->isInvalid()) {
207            return $labelCheck;
208        }
209        $this->addTLDWarnings($tldMissing);
210
211        $this->domainPart = $domain;
212        return new ValidEmail();
213    }
214
215    private function checkNotAllowedChars(array $token) : Result
216    {
217        $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
218        if (isset($notAllowed[$token['type']])) {
219            return new InvalidEmail(new CharNotAllowed(), $token['value']);
220        }
221        return new ValidEmail();
222    }
223
224    /**
225     * @return Result
226     */
227    protected function parseDomainLiteral() : Result
228    {
229        try {
230            $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
231        } catch (\RuntimeException $e) {
232            return new InvalidEmail(new ExpectingDomainLiteralClose(), $this->lexer->token['value']);
233        }
234
235        $domainLiteralParser = new DomainLiteralParser($this->lexer);
236        $result = $domainLiteralParser->parse();
237        $this->warnings = array_merge($this->warnings, $domainLiteralParser->getWarnings());
238        return $result;
239    }
240
241    protected function checkDomainPartExceptions(array $prev, bool $hasComments) : Result
242    {
243        if ($this->lexer->token['type'] === EmailLexer::S_OPENBRACKET && $prev['type'] !== EmailLexer::S_AT) {
244            return new InvalidEmail(new ExpectingATEXT('OPENBRACKET not after AT'), $this->lexer->token['value']);
245        }
246
247        if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
248            return new InvalidEmail(new DomainHyphened('Hypen found near DOT'), $this->lexer->token['value']);
249        }
250
251        if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
252            && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
253            return new InvalidEmail(new ExpectingATEXT('Escaping following "ATOM"'), $this->lexer->token['value']);
254        }
255
256        return $this->validateTokens($hasComments);
257    }
258
259    protected function validateTokens(bool $hasComments) : Result
260    {
261        $validDomainTokens = array(
262            EmailLexer::GENERIC => true,
263            EmailLexer::S_HYPHEN => true,
264            EmailLexer::S_DOT => true,
265        );
266
267        if ($hasComments) {
268            $validDomainTokens[EmailLexer::S_OPENPARENTHESIS] = true;
269            $validDomainTokens[EmailLexer::S_CLOSEPARENTHESIS] = true;
270        }
271
272        if (!isset($validDomainTokens[$this->lexer->token['type']])) {
273            return new InvalidEmail(new ExpectingATEXT('Invalid token in domain: ' . $this->lexer->token['value']), $this->lexer->token['value']);
274        }
275
276        return new ValidEmail();
277    }
278
279    private function checkLabelLength(bool $isEndOfDomain = false) : Result
280    {
281        if ($this->lexer->token['type'] === EmailLexer::S_DOT || $isEndOfDomain) {
282            if ($this->isLabelTooLong($this->label)) {
283                return new InvalidEmail(new LabelTooLong(), $this->lexer->token['value']);
284            }
285            $this->label = '';
286        }
287        $this->label .= $this->lexer->token['value'];
288        return new ValidEmail();
289    }
290
291
292    private function isLabelTooLong(string $label) : bool
293    {
294        if (preg_match('/[^\x00-\x7F]/', $label)) {
295            idn_to_ascii(utf8_decode($label), IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo);
296            return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG);
297        }
298        return strlen($label) > self::LABEL_MAX_LENGTH;
299    }
300
301    private function addTLDWarnings(bool $isTLDMissing) : void
302    {
303        if ($isTLDMissing) {
304            $this->warnings[TLD::CODE] = new TLD();
305        }
306    }
307
308    public function domainPart() : string
309    {
310        return $this->domainPart;
311    }
312}