1<?php 2 3namespace Egulias\EmailValidator\Parser; 4 5use Egulias\EmailValidator\EmailLexer; 6use Egulias\EmailValidator\Exception\CharNotAllowed; 7use Egulias\EmailValidator\Exception\CommaInDomain; 8use Egulias\EmailValidator\Exception\ConsecutiveAt; 9use Egulias\EmailValidator\Exception\CRLFAtTheEnd; 10use Egulias\EmailValidator\Exception\CRNoLF; 11use Egulias\EmailValidator\Exception\DomainHyphened; 12use Egulias\EmailValidator\Exception\DotAtEnd; 13use Egulias\EmailValidator\Exception\DotAtStart; 14use Egulias\EmailValidator\Exception\ExpectingATEXT; 15use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose; 16use Egulias\EmailValidator\Exception\ExpectingDTEXT; 17use Egulias\EmailValidator\Exception\NoDomainPart; 18use Egulias\EmailValidator\Exception\UnopenedComment; 19use Egulias\EmailValidator\Warning\AddressLiteral; 20use Egulias\EmailValidator\Warning\CFWSWithFWS; 21use Egulias\EmailValidator\Warning\DeprecatedComment; 22use Egulias\EmailValidator\Warning\DomainLiteral; 23use Egulias\EmailValidator\Warning\DomainTooLong; 24use Egulias\EmailValidator\Warning\IPV6BadChar; 25use Egulias\EmailValidator\Warning\IPV6ColonEnd; 26use Egulias\EmailValidator\Warning\IPV6ColonStart; 27use Egulias\EmailValidator\Warning\IPV6Deprecated; 28use Egulias\EmailValidator\Warning\IPV6DoubleColon; 29use Egulias\EmailValidator\Warning\IPV6GroupCount; 30use Egulias\EmailValidator\Warning\IPV6MaxGroups; 31use Egulias\EmailValidator\Warning\LabelTooLong; 32use Egulias\EmailValidator\Warning\ObsoleteDTEXT; 33use Egulias\EmailValidator\Warning\TLD; 34 35class DomainPart extends Parser 36{ 37 const DOMAIN_MAX_LENGTH = 254; 38 const LABEL_MAX_LENGTH = 63; 39 40 /** 41 * @var string 42 */ 43 protected $domainPart = ''; 44 45 public function parse($domainPart) 46 { 47 $this->lexer->moveNext(); 48 49 $this->performDomainStartChecks(); 50 51 $domain = $this->doParseDomainPart(); 52 53 $prev = $this->lexer->getPrevious(); 54 $length = strlen($domain); 55 56 if ($prev['type'] === EmailLexer::S_DOT) { 57 throw new DotAtEnd(); 58 } 59 if ($prev['type'] === EmailLexer::S_HYPHEN) { 60 throw new DomainHyphened(); 61 } 62 if ($length > self::DOMAIN_MAX_LENGTH) { 63 $this->warnings[DomainTooLong::CODE] = new DomainTooLong(); 64 } 65 if ($prev['type'] === EmailLexer::S_CR) { 66 throw new CRLFAtTheEnd(); 67 } 68 $this->domainPart = $domain; 69 } 70 71 private function performDomainStartChecks() 72 { 73 $this->checkInvalidTokensAfterAT(); 74 $this->checkEmptyDomain(); 75 76 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { 77 $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment(); 78 $this->parseDomainComments(); 79 } 80 } 81 82 private function checkEmptyDomain() 83 { 84 $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY || 85 ($this->lexer->token['type'] === EmailLexer::S_SP && 86 !$this->lexer->isNextToken(EmailLexer::GENERIC)); 87 88 if ($thereIsNoDomain) { 89 throw new NoDomainPart(); 90 } 91 } 92 93 private function checkInvalidTokensAfterAT() 94 { 95 if ($this->lexer->token['type'] === EmailLexer::S_DOT) { 96 throw new DotAtStart(); 97 } 98 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) { 99 throw new DomainHyphened(); 100 } 101 } 102 103 /** 104 * @return string 105 */ 106 public function getDomainPart() 107 { 108 return $this->domainPart; 109 } 110 111 /** 112 * @param string $addressLiteral 113 * @param int $maxGroups 114 */ 115 public function checkIPV6Tag($addressLiteral, $maxGroups = 8) 116 { 117 $prev = $this->lexer->getPrevious(); 118 if ($prev['type'] === EmailLexer::S_COLON) { 119 $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd(); 120 } 121 122 $IPv6 = substr($addressLiteral, 5); 123 //Daniel Marschall's new IPv6 testing strategy 124 $matchesIP = explode(':', $IPv6); 125 $groupCount = count($matchesIP); 126 $colons = strpos($IPv6, '::'); 127 128 if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) { 129 $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar(); 130 } 131 132 if ($colons === false) { 133 // We need exactly the right number of groups 134 if ($groupCount !== $maxGroups) { 135 $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount(); 136 } 137 return; 138 } 139 140 if ($colons !== strrpos($IPv6, '::')) { 141 $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon(); 142 return; 143 } 144 145 if ($colons === 0 || $colons === (strlen($IPv6) - 2)) { 146 // RFC 4291 allows :: at the start or end of an address 147 //with 7 other groups in addition 148 ++$maxGroups; 149 } 150 151 if ($groupCount > $maxGroups) { 152 $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups(); 153 } elseif ($groupCount === $maxGroups) { 154 $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated(); 155 } 156 } 157 158 /** 159 * @return string 160 */ 161 protected function doParseDomainPart() 162 { 163 $domain = ''; 164 $label = ''; 165 $openedParenthesis = 0; 166 do { 167 $prev = $this->lexer->getPrevious(); 168 169 $this->checkNotAllowedChars($this->lexer->token); 170 171 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { 172 $this->parseComments(); 173 $openedParenthesis += $this->getOpenedParenthesis(); 174 $this->lexer->moveNext(); 175 $tmpPrev = $this->lexer->getPrevious(); 176 if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) { 177 $openedParenthesis--; 178 } 179 } 180 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) { 181 if ($openedParenthesis === 0) { 182 throw new UnopenedComment(); 183 } else { 184 $openedParenthesis--; 185 } 186 } 187 188 $this->checkConsecutiveDots(); 189 $this->checkDomainPartExceptions($prev); 190 191 if ($this->hasBrackets()) { 192 $this->parseDomainLiteral(); 193 } 194 195 if ($this->lexer->token['type'] === EmailLexer::S_DOT) { 196 $this->checkLabelLength($label); 197 $label = ''; 198 } else { 199 $label .= $this->lexer->token['value']; 200 } 201 202 if ($this->isFWS()) { 203 $this->parseFWS(); 204 } 205 206 $domain .= $this->lexer->token['value']; 207 $this->lexer->moveNext(); 208 if ($this->lexer->token['type'] === EmailLexer::S_SP) { 209 throw new CharNotAllowed(); 210 } 211 } while (null !== $this->lexer->token['type']); 212 213 $this->checkLabelLength($label); 214 215 return $domain; 216 } 217 218 private function checkNotAllowedChars(array $token) 219 { 220 $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true]; 221 if (isset($notAllowed[$token['type']])) { 222 throw new CharNotAllowed(); 223 } 224 } 225 226 /** 227 * @return string|false 228 */ 229 protected function parseDomainLiteral() 230 { 231 if ($this->lexer->isNextToken(EmailLexer::S_COLON)) { 232 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); 233 } 234 if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) { 235 $lexer = clone $this->lexer; 236 $lexer->moveNext(); 237 if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) { 238 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); 239 } 240 } 241 242 return $this->doParseDomainLiteral(); 243 } 244 245 /** 246 * @return string|false 247 */ 248 protected function doParseDomainLiteral() 249 { 250 $IPv6TAG = false; 251 $addressLiteral = ''; 252 do { 253 if ($this->lexer->token['type'] === EmailLexer::C_NUL) { 254 throw new ExpectingDTEXT(); 255 } 256 257 if ($this->lexer->token['type'] === EmailLexer::INVALID || 258 $this->lexer->token['type'] === EmailLexer::C_DEL || 259 $this->lexer->token['type'] === EmailLexer::S_LF 260 ) { 261 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); 262 } 263 264 if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) { 265 throw new ExpectingDTEXT(); 266 } 267 268 if ($this->lexer->isNextTokenAny( 269 array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF) 270 )) { 271 $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS(); 272 $this->parseFWS(); 273 } 274 275 if ($this->lexer->isNextToken(EmailLexer::S_CR)) { 276 throw new CRNoLF(); 277 } 278 279 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) { 280 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); 281 $addressLiteral .= $this->lexer->token['value']; 282 $this->lexer->moveNext(); 283 $this->validateQuotedPair(); 284 } 285 if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) { 286 $IPv6TAG = true; 287 } 288 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) { 289 break; 290 } 291 292 $addressLiteral .= $this->lexer->token['value']; 293 294 } while ($this->lexer->moveNext()); 295 296 $addressLiteral = str_replace('[', '', $addressLiteral); 297 $addressLiteral = $this->checkIPV4Tag($addressLiteral); 298 299 if (false === $addressLiteral) { 300 return $addressLiteral; 301 } 302 303 if (!$IPv6TAG) { 304 $this->warnings[DomainLiteral::CODE] = new DomainLiteral(); 305 return $addressLiteral; 306 } 307 308 $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); 309 310 $this->checkIPV6Tag($addressLiteral); 311 312 return $addressLiteral; 313 } 314 315 /** 316 * @param string $addressLiteral 317 * 318 * @return string|false 319 */ 320 protected function checkIPV4Tag($addressLiteral) 321 { 322 $matchesIP = array(); 323 324 // Extract IPv4 part from the end of the address-literal (if there is one) 325 if (preg_match( 326 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', 327 $addressLiteral, 328 $matchesIP 329 ) > 0 330 ) { 331 $index = strrpos($addressLiteral, $matchesIP[0]); 332 if ($index === 0) { 333 $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); 334 return false; 335 } 336 // Convert IPv4 part to IPv6 format for further testing 337 $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0'; 338 } 339 340 return $addressLiteral; 341 } 342 343 protected function checkDomainPartExceptions(array $prev) 344 { 345 $invalidDomainTokens = array( 346 EmailLexer::S_DQUOTE => true, 347 EmailLexer::S_SQUOTE => true, 348 EmailLexer::S_BACKTICK => true, 349 EmailLexer::S_SEMICOLON => true, 350 EmailLexer::S_GREATERTHAN => true, 351 EmailLexer::S_LOWERTHAN => true, 352 ); 353 354 if (isset($invalidDomainTokens[$this->lexer->token['type']])) { 355 throw new ExpectingATEXT(); 356 } 357 358 if ($this->lexer->token['type'] === EmailLexer::S_COMMA) { 359 throw new CommaInDomain(); 360 } 361 362 if ($this->lexer->token['type'] === EmailLexer::S_AT) { 363 throw new ConsecutiveAt(); 364 } 365 366 if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) { 367 throw new ExpectingATEXT(); 368 } 369 370 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) { 371 throw new DomainHyphened(); 372 } 373 374 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH 375 && $this->lexer->isNextToken(EmailLexer::GENERIC)) { 376 throw new ExpectingATEXT(); 377 } 378 } 379 380 /** 381 * @return bool 382 */ 383 protected function hasBrackets() 384 { 385 if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) { 386 return false; 387 } 388 389 try { 390 $this->lexer->find(EmailLexer::S_CLOSEBRACKET); 391 } catch (\RuntimeException $e) { 392 throw new ExpectingDomainLiteralClose(); 393 } 394 395 return true; 396 } 397 398 /** 399 * @param string $label 400 */ 401 protected function checkLabelLength($label) 402 { 403 if ($this->isLabelTooLong($label)) { 404 $this->warnings[LabelTooLong::CODE] = new LabelTooLong(); 405 } 406 } 407 408 /** 409 * @param string $label 410 * @return bool 411 */ 412 private function isLabelTooLong($label) 413 { 414 if (preg_match('/[^\x00-\x7F]/', $label)) { 415 idn_to_ascii($label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo); 416 417 return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG); 418 } 419 420 return strlen($label) > self::LABEL_MAX_LENGTH; 421 } 422 423 protected function parseDomainComments() 424 { 425 $this->isUnclosedComment(); 426 while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) { 427 $this->warnEscaping(); 428 $this->lexer->moveNext(); 429 } 430 431 $this->lexer->moveNext(); 432 if ($this->lexer->isNextToken(EmailLexer::S_DOT)) { 433 throw new ExpectingATEXT(); 434 } 435 } 436 437 protected function addTLDWarnings() 438 { 439 if ($this->warnings[DomainLiteral::CODE]) { 440 $this->warnings[TLD::CODE] = new TLD(); 441 } 442 } 443} 444