1<?php 2 3namespace Egulias\EmailValidator; 4 5use Doctrine\Common\Lexer\AbstractLexer; 6 7class EmailLexer extends AbstractLexer 8{ 9 //ASCII values 10 const C_DEL = 127; 11 const C_NUL = 0; 12 const S_AT = 64; 13 const S_BACKSLASH = 92; 14 const S_DOT = 46; 15 const S_DQUOTE = 34; 16 const S_SQUOTE = 39; 17 const S_BACKTICK = 96; 18 const S_OPENPARENTHESIS = 49; 19 const S_CLOSEPARENTHESIS = 261; 20 const S_OPENBRACKET = 262; 21 const S_CLOSEBRACKET = 263; 22 const S_HYPHEN = 264; 23 const S_COLON = 265; 24 const S_DOUBLECOLON = 266; 25 const S_SP = 267; 26 const S_HTAB = 268; 27 const S_CR = 269; 28 const S_LF = 270; 29 const S_IPV6TAG = 271; 30 const S_LOWERTHAN = 272; 31 const S_GREATERTHAN = 273; 32 const S_COMMA = 274; 33 const S_SEMICOLON = 275; 34 const S_OPENQBRACKET = 276; 35 const S_CLOSEQBRACKET = 277; 36 const S_SLASH = 278; 37 const S_EMPTY = null; 38 const GENERIC = 300; 39 const CRLF = 301; 40 const INVALID = 302; 41 const ASCII_INVALID_FROM = 127; 42 const ASCII_INVALID_TO = 199; 43 44 /** 45 * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3) 46 * 47 * @var array 48 */ 49 protected $charValue = array( 50 '(' => self::S_OPENPARENTHESIS, 51 ')' => self::S_CLOSEPARENTHESIS, 52 '<' => self::S_LOWERTHAN, 53 '>' => self::S_GREATERTHAN, 54 '[' => self::S_OPENBRACKET, 55 ']' => self::S_CLOSEBRACKET, 56 ':' => self::S_COLON, 57 ';' => self::S_SEMICOLON, 58 '@' => self::S_AT, 59 '\\' => self::S_BACKSLASH, 60 '/' => self::S_SLASH, 61 ',' => self::S_COMMA, 62 '.' => self::S_DOT, 63 "'" => self::S_SQUOTE, 64 "`" => self::S_BACKTICK, 65 '"' => self::S_DQUOTE, 66 '-' => self::S_HYPHEN, 67 '::' => self::S_DOUBLECOLON, 68 ' ' => self::S_SP, 69 "\t" => self::S_HTAB, 70 "\r" => self::S_CR, 71 "\n" => self::S_LF, 72 "\r\n" => self::CRLF, 73 'IPv6' => self::S_IPV6TAG, 74 '{' => self::S_OPENQBRACKET, 75 '}' => self::S_CLOSEQBRACKET, 76 '' => self::S_EMPTY, 77 '\0' => self::C_NUL, 78 ); 79 80 /** 81 * @var bool 82 */ 83 protected $hasInvalidTokens = false; 84 85 /** 86 * @var array 87 * 88 * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty> 89 */ 90 protected $previous = []; 91 92 /** 93 * The last matched/seen token. 94 * 95 * @var array 96 * 97 * @psalm-var array{value:string, type:null|int, position:int} 98 */ 99 public $token; 100 101 /** 102 * The next token in the input. 103 * 104 * @var array|null 105 */ 106 public $lookahead; 107 108 /** 109 * @psalm-var array{value:'', type:null, position:0} 110 */ 111 private static $nullToken = [ 112 'value' => '', 113 'type' => null, 114 'position' => 0, 115 ]; 116 117 public function __construct() 118 { 119 $this->previous = $this->token = self::$nullToken; 120 $this->lookahead = null; 121 } 122 123 /** 124 * @return void 125 */ 126 public function reset() 127 { 128 $this->hasInvalidTokens = false; 129 parent::reset(); 130 $this->previous = $this->token = self::$nullToken; 131 } 132 133 /** 134 * @return bool 135 */ 136 public function hasInvalidTokens() 137 { 138 return $this->hasInvalidTokens; 139 } 140 141 /** 142 * @param int $type 143 * @throws \UnexpectedValueException 144 * @return boolean 145 * 146 * @psalm-suppress InvalidScalarArgument 147 */ 148 public function find($type) 149 { 150 $search = clone $this; 151 $search->skipUntil($type); 152 153 if (!$search->lookahead) { 154 throw new \UnexpectedValueException($type . ' not found'); 155 } 156 return true; 157 } 158 159 /** 160 * getPrevious 161 * 162 * @return array 163 */ 164 public function getPrevious() 165 { 166 return $this->previous; 167 } 168 169 /** 170 * moveNext 171 * 172 * @return boolean 173 */ 174 public function moveNext() 175 { 176 $this->previous = $this->token; 177 $hasNext = parent::moveNext(); 178 $this->token = $this->token ?: self::$nullToken; 179 180 return $hasNext; 181 } 182 183 /** 184 * Lexical catchable patterns. 185 * 186 * @return string[] 187 */ 188 protected function getCatchablePatterns() 189 { 190 return array( 191 '[a-zA-Z_]+[46]?', //ASCII and domain literal 192 '[^\x00-\x7F]', //UTF-8 193 '[0-9]+', 194 '\r\n', 195 '::', 196 '\s+?', 197 '.', 198 ); 199 } 200 201 /** 202 * Lexical non-catchable patterns. 203 * 204 * @return string[] 205 */ 206 protected function getNonCatchablePatterns() 207 { 208 return array('[\xA0-\xff]+'); 209 } 210 211 /** 212 * Retrieve token type. Also processes the token value if necessary. 213 * 214 * @param string $value 215 * @throws \InvalidArgumentException 216 * @return integer 217 */ 218 protected function getType(&$value) 219 { 220 if ($this->isNullType($value)) { 221 return self::C_NUL; 222 } 223 224 if ($this->isValid($value)) { 225 return $this->charValue[$value]; 226 } 227 228 if ($this->isUTF8Invalid($value)) { 229 $this->hasInvalidTokens = true; 230 return self::INVALID; 231 } 232 233 return self::GENERIC; 234 } 235 236 /** 237 * @param string $value 238 * 239 * @return bool 240 */ 241 protected function isValid($value) 242 { 243 if (isset($this->charValue[$value])) { 244 return true; 245 } 246 247 return false; 248 } 249 250 /** 251 * @param string $value 252 * @return bool 253 */ 254 protected function isNullType($value) 255 { 256 if ($value === "\0") { 257 return true; 258 } 259 260 return false; 261 } 262 263 /** 264 * @param string $value 265 * @return bool 266 */ 267 protected function isUTF8Invalid($value) 268 { 269 if (preg_match('/\p{Cc}+/u', $value)) { 270 return true; 271 } 272 273 return false; 274 } 275 276 /** 277 * @return string 278 */ 279 protected function getModifiers() 280 { 281 return 'iu'; 282 } 283} 284