1<?php 2 3namespace Egulias\EmailValidator\Parser; 4 5use Egulias\EmailValidator\EmailLexer; 6use Egulias\EmailValidator\Warning\TLD; 7use Egulias\EmailValidator\Result\Result; 8use Egulias\EmailValidator\Result\ValidEmail; 9use Egulias\EmailValidator\Result\InvalidEmail; 10use Egulias\EmailValidator\Result\Reason\DotAtEnd; 11use Egulias\EmailValidator\Result\Reason\DotAtStart; 12use Egulias\EmailValidator\Warning\DeprecatedComment; 13use Egulias\EmailValidator\Result\Reason\CRLFAtTheEnd; 14use Egulias\EmailValidator\Result\Reason\LabelTooLong; 15use Egulias\EmailValidator\Result\Reason\NoDomainPart; 16use Egulias\EmailValidator\Result\Reason\ConsecutiveAt; 17use Egulias\EmailValidator\Result\Reason\DomainTooLong; 18use Egulias\EmailValidator\Result\Reason\CharNotAllowed; 19use Egulias\EmailValidator\Result\Reason\DomainHyphened; 20use Egulias\EmailValidator\Result\Reason\ExpectingATEXT; 21use Egulias\EmailValidator\Parser\CommentStrategy\DomainComment; 22use Egulias\EmailValidator\Result\Reason\ExpectingDomainLiteralClose; 23use Egulias\EmailValidator\Parser\DomainLiteral as DomainLiteralParser; 24 25class DomainPart extends PartParser 26{ 27 const DOMAIN_MAX_LENGTH = 253; 28 const LABEL_MAX_LENGTH = 63; 29 30 /** 31 * @var string 32 */ 33 protected $domainPart = ''; 34 35 /** 36 * @var string 37 */ 38 protected $label = ''; 39 40 public function parse() : Result 41 { 42 $this->lexer->clearRecorded(); 43 $this->lexer->startRecording(); 44 45 $this->lexer->moveNext(); 46 47 $domainChecks = $this->performDomainStartChecks(); 48 if ($domainChecks->isInvalid()) { 49 return $domainChecks; 50 } 51 52 if ($this->lexer->token['type'] === EmailLexer::S_AT) { 53 return new InvalidEmail(new ConsecutiveAt(), $this->lexer->token['value']); 54 } 55 56 $result = $this->doParseDomainPart(); 57 if ($result->isInvalid()) { 58 return $result; 59 } 60 61 $end = $this->checkEndOfDomain(); 62 if ($end->isInvalid()) { 63 return $end; 64 } 65 66 $this->lexer->stopRecording(); 67 $this->domainPart = $this->lexer->getAccumulatedValues(); 68 69 $length = strlen($this->domainPart); 70 if ($length > self::DOMAIN_MAX_LENGTH) { 71 return new InvalidEmail(new DomainTooLong(), $this->lexer->token['value']); 72 } 73 74 return new ValidEmail(); 75 } 76 77 private function checkEndOfDomain() : Result 78 { 79 $prev = $this->lexer->getPrevious(); 80 if ($prev['type'] === EmailLexer::S_DOT) { 81 return new InvalidEmail(new DotAtEnd(), $this->lexer->token['value']); 82 } 83 if ($prev['type'] === EmailLexer::S_HYPHEN) { 84 return new InvalidEmail(new DomainHyphened('Hypen found at the end of the domain'), $prev['value']); 85 } 86 87 if ($this->lexer->token['type'] === EmailLexer::S_SP) { 88 return new InvalidEmail(new CRLFAtTheEnd(), $prev['value']); 89 } 90 return new ValidEmail(); 91 92 } 93 94 private function performDomainStartChecks() : Result 95 { 96 $invalidTokens = $this->checkInvalidTokensAfterAT(); 97 if ($invalidTokens->isInvalid()) { 98 return $invalidTokens; 99 } 100 101 $missingDomain = $this->checkEmptyDomain(); 102 if ($missingDomain->isInvalid()) { 103 return $missingDomain; 104 } 105 106 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { 107 $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment(); 108 } 109 return new ValidEmail(); 110 } 111 112 private function checkEmptyDomain() : Result 113 { 114 $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY || 115 ($this->lexer->token['type'] === EmailLexer::S_SP && 116 !$this->lexer->isNextToken(EmailLexer::GENERIC)); 117 118 if ($thereIsNoDomain) { 119 return new InvalidEmail(new NoDomainPart(), $this->lexer->token['value']); 120 } 121 122 return new ValidEmail(); 123 } 124 125 private function checkInvalidTokensAfterAT() : Result 126 { 127 if ($this->lexer->token['type'] === EmailLexer::S_DOT) { 128 return new InvalidEmail(new DotAtStart(), $this->lexer->token['value']); 129 } 130 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) { 131 return new InvalidEmail(new DomainHyphened('After AT'), $this->lexer->token['value']); 132 } 133 return new ValidEmail(); 134 } 135 136 protected function parseComments(): Result 137 { 138 $commentParser = new Comment($this->lexer, new DomainComment()); 139 $result = $commentParser->parse(); 140 $this->warnings = array_merge($this->warnings, $commentParser->getWarnings()); 141 142 return $result; 143 } 144 145 protected function doParseDomainPart() : Result 146 { 147 $tldMissing = true; 148 $hasComments = false; 149 $domain = ''; 150 do { 151 $prev = $this->lexer->getPrevious(); 152 153 $notAllowedChars = $this->checkNotAllowedChars($this->lexer->token); 154 if ($notAllowedChars->isInvalid()) { 155 return $notAllowedChars; 156 } 157 158 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS || 159 $this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS ) { 160 $hasComments = true; 161 $commentsResult = $this->parseComments(); 162 163 //Invalid comment parsing 164 if($commentsResult->isInvalid()) { 165 return $commentsResult; 166 } 167 } 168 169 $dotsResult = $this->checkConsecutiveDots(); 170 if ($dotsResult->isInvalid()) { 171 return $dotsResult; 172 } 173 174 if ($this->lexer->token['type'] === EmailLexer::S_OPENBRACKET) { 175 $literalResult = $this->parseDomainLiteral(); 176 177 $this->addTLDWarnings($tldMissing); 178 return $literalResult; 179 } 180 181 $labelCheck = $this->checkLabelLength(); 182 if ($labelCheck->isInvalid()) { 183 return $labelCheck; 184 } 185 186 $FwsResult = $this->parseFWS(); 187 if($FwsResult->isInvalid()) { 188 return $FwsResult; 189 } 190 191 $domain .= $this->lexer->token['value']; 192 193 if ($this->lexer->token['type'] === EmailLexer::S_DOT && $this->lexer->isNextToken(EmailLexer::GENERIC)) { 194 $tldMissing = false; 195 } 196 197 $exceptionsResult = $this->checkDomainPartExceptions($prev, $hasComments); 198 if ($exceptionsResult->isInvalid()) { 199 return $exceptionsResult; 200 } 201 $this->lexer->moveNext(); 202 203 } while (null !== $this->lexer->token['type']); 204 205 $labelCheck = $this->checkLabelLength(true); 206 if ($labelCheck->isInvalid()) { 207 return $labelCheck; 208 } 209 $this->addTLDWarnings($tldMissing); 210 211 $this->domainPart = $domain; 212 return new ValidEmail(); 213 } 214 215 private function checkNotAllowedChars(array $token) : Result 216 { 217 $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true]; 218 if (isset($notAllowed[$token['type']])) { 219 return new InvalidEmail(new CharNotAllowed(), $token['value']); 220 } 221 return new ValidEmail(); 222 } 223 224 /** 225 * @return Result 226 */ 227 protected function parseDomainLiteral() : Result 228 { 229 try { 230 $this->lexer->find(EmailLexer::S_CLOSEBRACKET); 231 } catch (\RuntimeException $e) { 232 return new InvalidEmail(new ExpectingDomainLiteralClose(), $this->lexer->token['value']); 233 } 234 235 $domainLiteralParser = new DomainLiteralParser($this->lexer); 236 $result = $domainLiteralParser->parse(); 237 $this->warnings = array_merge($this->warnings, $domainLiteralParser->getWarnings()); 238 return $result; 239 } 240 241 protected function checkDomainPartExceptions(array $prev, bool $hasComments) : Result 242 { 243 if ($this->lexer->token['type'] === EmailLexer::S_OPENBRACKET && $prev['type'] !== EmailLexer::S_AT) { 244 return new InvalidEmail(new ExpectingATEXT('OPENBRACKET not after AT'), $this->lexer->token['value']); 245 } 246 247 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) { 248 return new InvalidEmail(new DomainHyphened('Hypen found near DOT'), $this->lexer->token['value']); 249 } 250 251 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH 252 && $this->lexer->isNextToken(EmailLexer::GENERIC)) { 253 return new InvalidEmail(new ExpectingATEXT('Escaping following "ATOM"'), $this->lexer->token['value']); 254 } 255 256 return $this->validateTokens($hasComments); 257 } 258 259 protected function validateTokens(bool $hasComments) : Result 260 { 261 $validDomainTokens = array( 262 EmailLexer::GENERIC => true, 263 EmailLexer::S_HYPHEN => true, 264 EmailLexer::S_DOT => true, 265 ); 266 267 if ($hasComments) { 268 $validDomainTokens[EmailLexer::S_OPENPARENTHESIS] = true; 269 $validDomainTokens[EmailLexer::S_CLOSEPARENTHESIS] = true; 270 } 271 272 if (!isset($validDomainTokens[$this->lexer->token['type']])) { 273 return new InvalidEmail(new ExpectingATEXT('Invalid token in domain: ' . $this->lexer->token['value']), $this->lexer->token['value']); 274 } 275 276 return new ValidEmail(); 277 } 278 279 private function checkLabelLength(bool $isEndOfDomain = false) : Result 280 { 281 if ($this->lexer->token['type'] === EmailLexer::S_DOT || $isEndOfDomain) { 282 if ($this->isLabelTooLong($this->label)) { 283 return new InvalidEmail(new LabelTooLong(), $this->lexer->token['value']); 284 } 285 $this->label = ''; 286 } 287 $this->label .= $this->lexer->token['value']; 288 return new ValidEmail(); 289 } 290 291 292 private function isLabelTooLong(string $label) : bool 293 { 294 if (preg_match('/[^\x00-\x7F]/', $label)) { 295 idn_to_ascii(utf8_decode($label), IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo); 296 return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG); 297 } 298 return strlen($label) > self::LABEL_MAX_LENGTH; 299 } 300 301 private function addTLDWarnings(bool $isTLDMissing) : void 302 { 303 if ($isTLDMissing) { 304 $this->warnings[TLD::CODE] = new TLD(); 305 } 306 } 307 308 public function domainPart() : string 309 { 310 return $this->domainPart; 311 } 312}