1<?php 2 3namespace Egulias\EmailValidator\Parser; 4 5use Egulias\EmailValidator\EmailLexer; 6use Egulias\EmailValidator\Exception\CharNotAllowed; 7use Egulias\EmailValidator\Exception\CommaInDomain; 8use Egulias\EmailValidator\Exception\ConsecutiveAt; 9use Egulias\EmailValidator\Exception\CRLFAtTheEnd; 10use Egulias\EmailValidator\Exception\CRNoLF; 11use Egulias\EmailValidator\Exception\DomainHyphened; 12use Egulias\EmailValidator\Exception\DotAtEnd; 13use Egulias\EmailValidator\Exception\DotAtStart; 14use Egulias\EmailValidator\Exception\ExpectingATEXT; 15use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose; 16use Egulias\EmailValidator\Exception\ExpectingDTEXT; 17use Egulias\EmailValidator\Exception\NoDomainPart; 18use Egulias\EmailValidator\Exception\UnopenedComment; 19use Egulias\EmailValidator\Warning\AddressLiteral; 20use Egulias\EmailValidator\Warning\CFWSWithFWS; 21use Egulias\EmailValidator\Warning\DeprecatedComment; 22use Egulias\EmailValidator\Warning\DomainLiteral; 23use Egulias\EmailValidator\Warning\DomainTooLong; 24use Egulias\EmailValidator\Warning\IPV6BadChar; 25use Egulias\EmailValidator\Warning\IPV6ColonEnd; 26use Egulias\EmailValidator\Warning\IPV6ColonStart; 27use Egulias\EmailValidator\Warning\IPV6Deprecated; 28use Egulias\EmailValidator\Warning\IPV6DoubleColon; 29use Egulias\EmailValidator\Warning\IPV6GroupCount; 30use Egulias\EmailValidator\Warning\IPV6MaxGroups; 31use Egulias\EmailValidator\Warning\LabelTooLong; 32use Egulias\EmailValidator\Warning\ObsoleteDTEXT; 33use Egulias\EmailValidator\Warning\TLD; 34 35class DomainPart extends Parser 36{ 37 const DOMAIN_MAX_LENGTH = 254; 38 39 /** 40 * @var string 41 */ 42 protected $domainPart = ''; 43 44 public function parse($domainPart) 45 { 46 $this->lexer->moveNext(); 47 48 $this->performDomainStartChecks(); 49 50 $domain = $this->doParseDomainPart(); 51 52 $prev = $this->lexer->getPrevious(); 53 $length = strlen($domain); 54 55 if ($prev['type'] === EmailLexer::S_DOT) { 56 throw new DotAtEnd(); 57 } 58 if ($prev['type'] === EmailLexer::S_HYPHEN) { 59 throw new DomainHyphened(); 60 } 61 if ($length > self::DOMAIN_MAX_LENGTH) { 62 $this->warnings[DomainTooLong::CODE] = new DomainTooLong(); 63 } 64 if ($prev['type'] === EmailLexer::S_CR) { 65 throw new CRLFAtTheEnd(); 66 } 67 $this->domainPart = $domain; 68 } 69 70 private function performDomainStartChecks() 71 { 72 $this->checkInvalidTokensAfterAT(); 73 $this->checkEmptyDomain(); 74 75 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { 76 $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment(); 77 $this->parseDomainComments(); 78 } 79 } 80 81 private function checkEmptyDomain() 82 { 83 $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY || 84 ($this->lexer->token['type'] === EmailLexer::S_SP && 85 !$this->lexer->isNextToken(EmailLexer::GENERIC)); 86 87 if ($thereIsNoDomain) { 88 throw new NoDomainPart(); 89 } 90 } 91 92 private function checkInvalidTokensAfterAT() 93 { 94 if ($this->lexer->token['type'] === EmailLexer::S_DOT) { 95 throw new DotAtStart(); 96 } 97 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) { 98 throw new DomainHyphened(); 99 } 100 } 101 102 /** 103 * @return string 104 */ 105 public function getDomainPart() 106 { 107 return $this->domainPart; 108 } 109 110 /** 111 * @param string $addressLiteral 112 * @param int $maxGroups 113 */ 114 public function checkIPV6Tag($addressLiteral, $maxGroups = 8) 115 { 116 $prev = $this->lexer->getPrevious(); 117 if ($prev['type'] === EmailLexer::S_COLON) { 118 $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd(); 119 } 120 121 $IPv6 = substr($addressLiteral, 5); 122 //Daniel Marschall's new IPv6 testing strategy 123 $matchesIP = explode(':', $IPv6); 124 $groupCount = count($matchesIP); 125 $colons = strpos($IPv6, '::'); 126 127 if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) { 128 $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar(); 129 } 130 131 if ($colons === false) { 132 // We need exactly the right number of groups 133 if ($groupCount !== $maxGroups) { 134 $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount(); 135 } 136 return; 137 } 138 139 if ($colons !== strrpos($IPv6, '::')) { 140 $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon(); 141 return; 142 } 143 144 if ($colons === 0 || $colons === (strlen($IPv6) - 2)) { 145 // RFC 4291 allows :: at the start or end of an address 146 //with 7 other groups in addition 147 ++$maxGroups; 148 } 149 150 if ($groupCount > $maxGroups) { 151 $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups(); 152 } elseif ($groupCount === $maxGroups) { 153 $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated(); 154 } 155 } 156 157 /** 158 * @return string 159 */ 160 protected function doParseDomainPart() 161 { 162 $domain = ''; 163 $openedParenthesis = 0; 164 do { 165 $prev = $this->lexer->getPrevious(); 166 167 $this->checkNotAllowedChars($this->lexer->token); 168 169 if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { 170 $this->parseComments(); 171 $openedParenthesis += $this->getOpenedParenthesis(); 172 $this->lexer->moveNext(); 173 $tmpPrev = $this->lexer->getPrevious(); 174 if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) { 175 $openedParenthesis--; 176 } 177 } 178 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) { 179 if ($openedParenthesis === 0) { 180 throw new UnopenedComment(); 181 } else { 182 $openedParenthesis--; 183 } 184 } 185 186 $this->checkConsecutiveDots(); 187 $this->checkDomainPartExceptions($prev); 188 189 if ($this->hasBrackets()) { 190 $this->parseDomainLiteral(); 191 } 192 193 $this->checkLabelLength($prev); 194 195 if ($this->isFWS()) { 196 $this->parseFWS(); 197 } 198 199 $domain .= $this->lexer->token['value']; 200 $this->lexer->moveNext(); 201 } while (null !== $this->lexer->token['type']); 202 203 return $domain; 204 } 205 206 private function checkNotAllowedChars(array $token) 207 { 208 $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true]; 209 if (isset($notAllowed[$token['type']])) { 210 throw new CharNotAllowed(); 211 } 212 } 213 214 /** 215 * @return string|false 216 */ 217 protected function parseDomainLiteral() 218 { 219 if ($this->lexer->isNextToken(EmailLexer::S_COLON)) { 220 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); 221 } 222 if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) { 223 $lexer = clone $this->lexer; 224 $lexer->moveNext(); 225 if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) { 226 $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); 227 } 228 } 229 230 return $this->doParseDomainLiteral(); 231 } 232 233 /** 234 * @return string|false 235 */ 236 protected function doParseDomainLiteral() 237 { 238 $IPv6TAG = false; 239 $addressLiteral = ''; 240 do { 241 if ($this->lexer->token['type'] === EmailLexer::C_NUL) { 242 throw new ExpectingDTEXT(); 243 } 244 245 if ($this->lexer->token['type'] === EmailLexer::INVALID || 246 $this->lexer->token['type'] === EmailLexer::C_DEL || 247 $this->lexer->token['type'] === EmailLexer::S_LF 248 ) { 249 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); 250 } 251 252 if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) { 253 throw new ExpectingDTEXT(); 254 } 255 256 if ($this->lexer->isNextTokenAny( 257 array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF) 258 )) { 259 $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS(); 260 $this->parseFWS(); 261 } 262 263 if ($this->lexer->isNextToken(EmailLexer::S_CR)) { 264 throw new CRNoLF(); 265 } 266 267 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) { 268 $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); 269 $addressLiteral .= $this->lexer->token['value']; 270 $this->lexer->moveNext(); 271 $this->validateQuotedPair(); 272 } 273 if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) { 274 $IPv6TAG = true; 275 } 276 if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) { 277 break; 278 } 279 280 $addressLiteral .= $this->lexer->token['value']; 281 282 } while ($this->lexer->moveNext()); 283 284 $addressLiteral = str_replace('[', '', $addressLiteral); 285 $addressLiteral = $this->checkIPV4Tag($addressLiteral); 286 287 if (false === $addressLiteral) { 288 return $addressLiteral; 289 } 290 291 if (!$IPv6TAG) { 292 $this->warnings[DomainLiteral::CODE] = new DomainLiteral(); 293 return $addressLiteral; 294 } 295 296 $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); 297 298 $this->checkIPV6Tag($addressLiteral); 299 300 return $addressLiteral; 301 } 302 303 /** 304 * @param string $addressLiteral 305 * 306 * @return string|false 307 */ 308 protected function checkIPV4Tag($addressLiteral) 309 { 310 $matchesIP = array(); 311 312 // Extract IPv4 part from the end of the address-literal (if there is one) 313 if (preg_match( 314 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', 315 $addressLiteral, 316 $matchesIP 317 ) > 0 318 ) { 319 $index = strrpos($addressLiteral, $matchesIP[0]); 320 if ($index === 0) { 321 $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); 322 return false; 323 } 324 // Convert IPv4 part to IPv6 format for further testing 325 $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0'; 326 } 327 328 return $addressLiteral; 329 } 330 331 protected function checkDomainPartExceptions(array $prev) 332 { 333 $invalidDomainTokens = array( 334 EmailLexer::S_DQUOTE => true, 335 EmailLexer::S_SEMICOLON => true, 336 EmailLexer::S_GREATERTHAN => true, 337 EmailLexer::S_LOWERTHAN => true, 338 ); 339 340 if (isset($invalidDomainTokens[$this->lexer->token['type']])) { 341 throw new ExpectingATEXT(); 342 } 343 344 if ($this->lexer->token['type'] === EmailLexer::S_COMMA) { 345 throw new CommaInDomain(); 346 } 347 348 if ($this->lexer->token['type'] === EmailLexer::S_AT) { 349 throw new ConsecutiveAt(); 350 } 351 352 if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) { 353 throw new ExpectingATEXT(); 354 } 355 356 if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) { 357 throw new DomainHyphened(); 358 } 359 360 if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH 361 && $this->lexer->isNextToken(EmailLexer::GENERIC)) { 362 throw new ExpectingATEXT(); 363 } 364 } 365 366 /** 367 * @return bool 368 */ 369 protected function hasBrackets() 370 { 371 if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) { 372 return false; 373 } 374 375 try { 376 $this->lexer->find(EmailLexer::S_CLOSEBRACKET); 377 } catch (\RuntimeException $e) { 378 throw new ExpectingDomainLiteralClose(); 379 } 380 381 return true; 382 } 383 384 protected function checkLabelLength(array $prev) 385 { 386 if ($this->lexer->token['type'] === EmailLexer::S_DOT && 387 $prev['type'] === EmailLexer::GENERIC && 388 strlen($prev['value']) > 63 389 ) { 390 $this->warnings[LabelTooLong::CODE] = new LabelTooLong(); 391 } 392 } 393 394 protected function parseDomainComments() 395 { 396 $this->isUnclosedComment(); 397 while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) { 398 $this->warnEscaping(); 399 $this->lexer->moveNext(); 400 } 401 402 $this->lexer->moveNext(); 403 if ($this->lexer->isNextToken(EmailLexer::S_DOT)) { 404 throw new ExpectingATEXT(); 405 } 406 } 407 408 protected function addTLDWarnings() 409 { 410 if ($this->warnings[DomainLiteral::CODE]) { 411 $this->warnings[TLD::CODE] = new TLD(); 412 } 413 } 414} 415