1<?php 2 3namespace Sabberworm\CSS; 4 5use Sabberworm\CSS\CSSList\CSSList; 6use Sabberworm\CSS\CSSList\Document; 7use Sabberworm\CSS\CSSList\KeyFrame; 8use Sabberworm\CSS\Property\AtRule; 9use Sabberworm\CSS\Property\Import; 10use Sabberworm\CSS\Property\Charset; 11use Sabberworm\CSS\Property\CSSNamespace; 12use Sabberworm\CSS\RuleSet\AtRuleSet; 13use Sabberworm\CSS\CSSList\AtRuleBlockList; 14use Sabberworm\CSS\RuleSet\DeclarationBlock; 15use Sabberworm\CSS\Value\CSSFunction; 16use Sabberworm\CSS\Value\RuleValueList; 17use Sabberworm\CSS\Value\Size; 18use Sabberworm\CSS\Value\Color; 19use Sabberworm\CSS\Value\URL; 20use Sabberworm\CSS\Value\CSSString; 21use Sabberworm\CSS\Rule\Rule; 22use Sabberworm\CSS\Parsing\UnexpectedTokenException; 23 24/** 25 * Parser class parses CSS from text into a data structure. 26 */ 27class Parser { 28 29 private $aText; 30 private $iCurrentPosition; 31 private $oParserSettings; 32 private $sCharset; 33 private $iLength; 34 private $blockRules; 35 private $aSizeUnits; 36 37 public function __construct($sText, Settings $oParserSettings = null) { 38 $this->iCurrentPosition = 0; 39 if ($oParserSettings === null) { 40 $oParserSettings = Settings::create(); 41 } 42 $this->oParserSettings = $oParserSettings; 43 if ($this->oParserSettings->bMultibyteSupport) { 44 $this->aText = preg_split('//u', $sText, null, PREG_SPLIT_NO_EMPTY); 45 } else { 46 if($sText === '') { 47 $this->aText = array(); 48 } else { 49 $this->aText = str_split($sText); 50 } 51 } 52 $this->blockRules = explode('/', AtRule::BLOCK_RULES); 53 54 foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { 55 $iSize = strlen($val); 56 if(!isset($this->aSizeUnits[$iSize])) { 57 $this->aSizeUnits[$iSize] = array(); 58 } 59 $this->aSizeUnits[$iSize][strtolower($val)] = $val; 60 } 61 ksort($this->aSizeUnits, SORT_NUMERIC); 62 } 63 64 public function setCharset($sCharset) { 65 $this->sCharset = $sCharset; 66 $this->iLength = count($this->aText); 67 } 68 69 public function getCharset() { 70 return $this->sCharset; 71 } 72 73 public function parse() { 74 $this->setCharset($this->oParserSettings->sDefaultCharset); 75 $oResult = new Document(); 76 $this->parseDocument($oResult); 77 return $oResult; 78 } 79 80 private function parseDocument(Document $oDocument) { 81 $this->consumeWhiteSpace(); 82 $this->parseList($oDocument, true); 83 } 84 85 private function parseList(CSSList $oList, $bIsRoot = false) { 86 while (!$this->isEnd()) { 87 if ($this->comes('@')) { 88 $oList->append($this->parseAtRule()); 89 } else if ($this->comes('}')) { 90 $this->consume('}'); 91 if ($bIsRoot) { 92 throw new \Exception("Unopened {"); 93 } else { 94 return; 95 } 96 } else { 97 if($this->oParserSettings->bLenientParsing) { 98 try { 99 $oList->append($this->parseSelector()); 100 } catch (UnexpectedTokenException $e) {} 101 } else { 102 $oList->append($this->parseSelector()); 103 } 104 } 105 $this->consumeWhiteSpace(); 106 } 107 if (!$bIsRoot) { 108 throw new \Exception("Unexpected end of document"); 109 } 110 } 111 112 private function parseAtRule() { 113 $this->consume('@'); 114 $sIdentifier = $this->parseIdentifier(); 115 $this->consumeWhiteSpace(); 116 if ($sIdentifier === 'import') { 117 $oLocation = $this->parseURLValue(); 118 $this->consumeWhiteSpace(); 119 $sMediaQuery = null; 120 if (!$this->comes(';')) { 121 $sMediaQuery = $this->consumeUntil(';'); 122 } 123 $this->consume(';'); 124 return new Import($oLocation, $sMediaQuery); 125 } else if ($sIdentifier === 'charset') { 126 $sCharset = $this->parseStringValue(); 127 $this->consumeWhiteSpace(); 128 $this->consume(';'); 129 $this->setCharset($sCharset->getString()); 130 return new Charset($sCharset); 131 } else if ($this->identifierIs($sIdentifier, 'keyframes')) { 132 $oResult = new KeyFrame(); 133 $oResult->setVendorKeyFrame($sIdentifier); 134 $oResult->setAnimationName(trim($this->consumeUntil('{', false, true))); 135 $this->consumeWhiteSpace(); 136 $this->parseList($oResult); 137 return $oResult; 138 } else if ($sIdentifier === 'namespace') { 139 $sPrefix = null; 140 $mUrl = $this->parsePrimitiveValue(); 141 if (!$this->comes(';')) { 142 $sPrefix = $mUrl; 143 $mUrl = $this->parsePrimitiveValue(); 144 } 145 $this->consume(';'); 146 if ($sPrefix !== null && !is_string($sPrefix)) { 147 throw new \Exception('Wrong namespace prefix '.$sPrefix); 148 } 149 if (!($mUrl instanceof CSSString || $mUrl instanceof URL)) { 150 throw new \Exception('Wrong namespace url of invalid type '.$mUrl); 151 } 152 return new CSSNamespace($mUrl, $sPrefix); 153 } else { 154 //Unknown other at rule (font-face or such) 155 $sArgs = trim($this->consumeUntil('{', false, true)); 156 $this->consumeWhiteSpace(); 157 $bUseRuleSet = true; 158 foreach($this->blockRules as $sBlockRuleName) { 159 if($this->identifierIs($sIdentifier, $sBlockRuleName)) { 160 $bUseRuleSet = false; 161 break; 162 } 163 } 164 if($bUseRuleSet) { 165 $oAtRule = new AtRuleSet($sIdentifier, $sArgs); 166 $this->parseRuleSet($oAtRule); 167 } else { 168 $oAtRule = new AtRuleBlockList($sIdentifier, $sArgs); 169 $this->parseList($oAtRule); 170 } 171 return $oAtRule; 172 } 173 } 174 175 private function parseIdentifier($bAllowFunctions = true, $bIgnoreCase = true) { 176 $sResult = $this->parseCharacter(true); 177 if ($sResult === null) { 178 throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier'); 179 } 180 $sCharacter = null; 181 while (($sCharacter = $this->parseCharacter(true)) !== null) { 182 $sResult .= $sCharacter; 183 } 184 if ($bIgnoreCase) { 185 $sResult = $this->strtolower($sResult); 186 } 187 if ($bAllowFunctions && $this->comes('(')) { 188 $this->consume('('); 189 $aArguments = $this->parseValue(array('=', ' ', ',')); 190 $sResult = new CSSFunction($sResult, $aArguments); 191 $this->consume(')'); 192 } 193 return $sResult; 194 } 195 196 private function parseStringValue() { 197 $sBegin = $this->peek(); 198 $sQuote = null; 199 if ($sBegin === "'") { 200 $sQuote = "'"; 201 } else if ($sBegin === '"') { 202 $sQuote = '"'; 203 } 204 if ($sQuote !== null) { 205 $this->consume($sQuote); 206 } 207 $sResult = ""; 208 $sContent = null; 209 if ($sQuote === null) { 210 //Unquoted strings end in whitespace or with braces, brackets, parentheses 211 while (!preg_match('/[\\s{}()<>\\[\\]]/isu', $this->peek())) { 212 $sResult .= $this->parseCharacter(false); 213 } 214 } else { 215 while (!$this->comes($sQuote)) { 216 $sContent = $this->parseCharacter(false); 217 if ($sContent === null) { 218 throw new \Exception("Non-well-formed quoted string {$this->peek(3)}"); 219 } 220 $sResult .= $sContent; 221 } 222 $this->consume($sQuote); 223 } 224 return new CSSString($sResult); 225 } 226 227 private function parseCharacter($bIsForIdentifier) { 228 if ($this->peek() === '\\') { 229 $this->consume('\\'); 230 if ($this->comes('\n') || $this->comes('\r')) { 231 return ''; 232 } 233 if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { 234 return $this->consume(1); 235 } 236 $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u'); 237 if ($this->strlen($sUnicode) < 6) { 238 //Consume whitespace after incomplete unicode escape 239 if (preg_match('/\\s/isSu', $this->peek())) { 240 if ($this->comes('\r\n')) { 241 $this->consume(2); 242 } else { 243 $this->consume(1); 244 } 245 } 246 } 247 $iUnicode = intval($sUnicode, 16); 248 $sUtf32 = ""; 249 for ($i = 0; $i < 4; ++$i) { 250 $sUtf32 .= chr($iUnicode & 0xff); 251 $iUnicode = $iUnicode >> 8; 252 } 253 return iconv('utf-32le', $this->sCharset, $sUtf32); 254 } 255 if ($bIsForIdentifier) { 256 $peek = ord($this->peek()); 257 // Ranges: a-z A-Z 0-9 - _ 258 if (($peek >= 97 && $peek <= 122) || 259 ($peek >= 65 && $peek <= 90) || 260 ($peek >= 48 && $peek <= 57) || 261 ($peek === 45) || 262 ($peek === 95) || 263 ($peek > 0xa1)) { 264 return $this->consume(1); 265 } 266 } else { 267 return $this->consume(1); 268 } 269 return null; 270 } 271 272 private function parseSelector() { 273 $oResult = new DeclarationBlock(); 274 $oResult->setSelector($this->consumeUntil('{', false, true)); 275 $this->consumeWhiteSpace(); 276 $this->parseRuleSet($oResult); 277 return $oResult; 278 } 279 280 private function parseRuleSet($oRuleSet) { 281 while ($this->comes(';')) { 282 $this->consume(';'); 283 $this->consumeWhiteSpace(); 284 } 285 while (!$this->comes('}')) { 286 $oRule = null; 287 if($this->oParserSettings->bLenientParsing) { 288 try { 289 $oRule = $this->parseRule(); 290 } catch (UnexpectedTokenException $e) { 291 try { 292 $sConsume = $this->consumeUntil(array("\n", ";", '}'), true); 293 // We need to “unfind” the matches to the end of the ruleSet as this will be matched later 294 if($this->streql(substr($sConsume, -1), '}')) { 295 --$this->iCurrentPosition; 296 } else { 297 $this->consumeWhiteSpace(); 298 while ($this->comes(';')) { 299 $this->consume(';'); 300 } 301 } 302 } catch (UnexpectedTokenException $e) { 303 // We’ve reached the end of the document. Just close the RuleSet. 304 return; 305 } 306 } 307 } else { 308 $oRule = $this->parseRule(); 309 } 310 if($oRule) { 311 $oRuleSet->addRule($oRule); 312 } 313 $this->consumeWhiteSpace(); 314 } 315 $this->consume('}'); 316 } 317 318 private function parseRule() { 319 $oRule = new Rule($this->parseIdentifier()); 320 $this->consumeWhiteSpace(); 321 $this->consume(':'); 322 $oValue = $this->parseValue(self::listDelimiterForRule($oRule->getRule())); 323 $oRule->setValue($oValue); 324 if ($this->comes('!')) { 325 $this->consume('!'); 326 $this->consumeWhiteSpace(); 327 $this->consume('important'); 328 $oRule->setIsImportant(true); 329 } 330 while ($this->comes(';')) { 331 $this->consume(';'); 332 $this->consumeWhiteSpace(); 333 } 334 return $oRule; 335 } 336 337 private function parseValue($aListDelimiters) { 338 $aStack = array(); 339 $this->consumeWhiteSpace(); 340 //Build a list of delimiters and parsed values 341 while (!($this->comes('}') || $this->comes(';') || $this->comes('!') || $this->comes(')'))) { 342 if (count($aStack) > 0) { 343 $bFoundDelimiter = false; 344 foreach ($aListDelimiters as $sDelimiter) { 345 if ($this->comes($sDelimiter)) { 346 array_push($aStack, $this->consume($sDelimiter)); 347 $this->consumeWhiteSpace(); 348 $bFoundDelimiter = true; 349 break; 350 } 351 } 352 if (!$bFoundDelimiter) { 353 //Whitespace was the list delimiter 354 array_push($aStack, ' '); 355 } 356 } 357 array_push($aStack, $this->parsePrimitiveValue()); 358 $this->consumeWhiteSpace(); 359 } 360 //Convert the list to list objects 361 foreach ($aListDelimiters as $sDelimiter) { 362 if (count($aStack) === 1) { 363 return $aStack[0]; 364 } 365 $iStartPosition = null; 366 while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) { 367 $iLength = 2; //Number of elements to be joined 368 for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) { 369 if ($sDelimiter !== $aStack[$i]) { 370 break; 371 } 372 } 373 $oList = new RuleValueList($sDelimiter); 374 for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) { 375 $oList->addListComponent($aStack[$i]); 376 } 377 array_splice($aStack, $iStartPosition - 1, $iLength * 2 - 1, array($oList)); 378 } 379 } 380 return $aStack[0]; 381 } 382 383 private static function listDelimiterForRule($sRule) { 384 if (preg_match('/^font($|-)/', $sRule)) { 385 return array(',', '/', ' '); 386 } 387 return array(',', ' ', '/'); 388 } 389 390 private function parsePrimitiveValue() { 391 $oValue = null; 392 $this->consumeWhiteSpace(); 393 if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) { 394 $oValue = $this->parseNumericValue(); 395 } else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) { 396 $oValue = $this->parseColorValue(); 397 } else if ($this->comes('url', true)) { 398 $oValue = $this->parseURLValue(); 399 } else if ($this->comes("'") || $this->comes('"')) { 400 $oValue = $this->parseStringValue(); 401 } else { 402 $oValue = $this->parseIdentifier(true, false); 403 } 404 $this->consumeWhiteSpace(); 405 return $oValue; 406 } 407 408 private function parseNumericValue($bForColor = false) { 409 $sSize = ''; 410 if ($this->comes('-')) { 411 $sSize .= $this->consume('-'); 412 } 413 while (is_numeric($this->peek()) || $this->comes('.')) { 414 if ($this->comes('.')) { 415 $sSize .= $this->consume('.'); 416 } else { 417 $sSize .= $this->consume(1); 418 } 419 } 420 421 $sUnit = null; 422 foreach ($this->aSizeUnits as $iLength => &$aValues) { 423 $sKey = strtolower($this->peek($iLength)); 424 if(array_key_exists($sKey, $aValues)) { 425 if (($sUnit = $aValues[$sKey]) !== null) { 426 $this->consume($iLength); 427 break; 428 } 429 } 430 } 431 return new Size(floatval($sSize), $sUnit, $bForColor); 432 } 433 434 private function parseColorValue() { 435 $aColor = array(); 436 if ($this->comes('#')) { 437 $this->consume('#'); 438 $sValue = $this->parseIdentifier(false); 439 if ($this->strlen($sValue) === 3) { 440 $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2]; 441 } 442 $aColor = array('r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true), 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true), 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true)); 443 } else { 444 $sColorMode = $this->parseIdentifier(false); 445 $this->consumeWhiteSpace(); 446 $this->consume('('); 447 $iLength = $this->strlen($sColorMode); 448 for ($i = 0; $i < $iLength; ++$i) { 449 $this->consumeWhiteSpace(); 450 $aColor[$sColorMode[$i]] = $this->parseNumericValue(true); 451 $this->consumeWhiteSpace(); 452 if ($i < ($iLength - 1)) { 453 $this->consume(','); 454 } 455 } 456 $this->consume(')'); 457 } 458 return new Color($aColor); 459 } 460 461 private function parseURLValue() { 462 $bUseUrl = $this->comes('url', true); 463 if ($bUseUrl) { 464 $this->consume('url'); 465 $this->consumeWhiteSpace(); 466 $this->consume('('); 467 } 468 $this->consumeWhiteSpace(); 469 $oResult = new URL($this->parseStringValue()); 470 if ($bUseUrl) { 471 $this->consumeWhiteSpace(); 472 $this->consume(')'); 473 } 474 return $oResult; 475 } 476 477 /** 478 * Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. 479 */ 480 private function identifierIs($sIdentifier, $sMatch) { 481 return (strcasecmp($sIdentifier, $sMatch) === 0) 482 ?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1; 483 } 484 485 private function comes($sString, $bCaseInsensitive = false) { 486 $sPeek = $this->peek(strlen($sString)); 487 return ($sPeek == '') 488 ? false 489 : $this->streql($sPeek, $sString, $bCaseInsensitive); 490 } 491 492 private function peek($iLength = 1, $iOffset = 0) { 493 $iOffset += $this->iCurrentPosition; 494 if ($iOffset >= $this->iLength) { 495 return ''; 496 } 497 $out = $this->substr($iOffset, $iLength); 498 return $out; 499 } 500 501 private function consume($mValue = 1) { 502 if (is_string($mValue)) { 503 $iLength = $this->strlen($mValue); 504 if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { 505 throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5))); 506 } 507 $this->iCurrentPosition += $this->strlen($mValue); 508 return $mValue; 509 } else { 510 if ($this->iCurrentPosition + $mValue > $this->iLength) { 511 throw new UnexpectedTokenException($mValue, $this->peek(5), 'count'); 512 } 513 $sResult = $this->substr($this->iCurrentPosition, $mValue); 514 $this->iCurrentPosition += $mValue; 515 return $sResult; 516 } 517 } 518 519 private function consumeExpression($mExpression) { 520 $aMatches = null; 521 if (preg_match($mExpression, $this->inputLeft(), $aMatches, PREG_OFFSET_CAPTURE) === 1) { 522 return $this->consume($aMatches[0][0]); 523 } 524 throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression'); 525 } 526 527 private function consumeWhiteSpace() { 528 do { 529 while (preg_match('/\\s/isSu', $this->peek()) === 1) { 530 $this->consume(1); 531 } 532 if($this->oParserSettings->bLenientParsing) { 533 try { 534 $bHasComment = $this->consumeComment(); 535 } catch(UnexpectedTokenException $e) { 536 // When we can’t find the end of a comment, we assume the document is finished. 537 $this->iCurrentPosition = $this->iLength; 538 return; 539 } 540 } else { 541 $bHasComment = $this->consumeComment(); 542 } 543 } while($bHasComment); 544 } 545 546 private function consumeComment() { 547 if ($this->comes('/*')) { 548 $this->consume(1); 549 while ($this->consume(1) !== '') { 550 if ($this->comes('*/')) { 551 $this->consume(2); 552 return true; 553 } 554 } 555 } 556 return false; 557 } 558 559 private function isEnd() { 560 return $this->iCurrentPosition >= $this->iLength; 561 } 562 563 private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false) { 564 $aEnd = is_array($aEnd) ? $aEnd : array($aEnd); 565 $out = ''; 566 $start = $this->iCurrentPosition; 567 568 while (($char = $this->consume(1)) !== '') { 569 $this->consumeComment(); 570 if (in_array($char, $aEnd)) { 571 if ($bIncludeEnd) { 572 $out .= $char; 573 } elseif (!$consumeEnd) { 574 $this->iCurrentPosition -= $this->strlen($char); 575 } 576 return $out; 577 } 578 $out .= $char; 579 } 580 581 $this->iCurrentPosition = $start; 582 throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search'); 583 } 584 585 private function inputLeft() { 586 return $this->substr($this->iCurrentPosition, -1); 587 } 588 589 private function substr($iStart, $iLength) { 590 if ($iLength < 0) { 591 $iLength = $this->iLength - $iStart + $iLength; 592 } 593 if ($iStart + $iLength > $this->iLength) { 594 $iLength = $this->iLength - $iStart; 595 } 596 $out = ''; 597 while ($iLength > 0) { 598 $out .= $this->aText[$iStart]; 599 $iStart++; 600 $iLength--; 601 } 602 return $out; 603 } 604 605 private function strlen($sString) { 606 if ($this->oParserSettings->bMultibyteSupport) { 607 return mb_strlen($sString, $this->sCharset); 608 } else { 609 return strlen($sString); 610 } 611 } 612 613 private function streql($sString1, $sString2, $bCaseInsensitive = true) { 614 if($bCaseInsensitive) { 615 return $this->strtolower($sString1) === $this->strtolower($sString2); 616 } else { 617 return $sString1 === $sString2; 618 } 619 } 620 621 private function strtolower($sString) { 622 if ($this->oParserSettings->bMultibyteSupport) { 623 return mb_strtolower($sString, $this->sCharset); 624 } else { 625 return strtolower($sString); 626 } 627 } 628 629 private function strpos($sString, $sNeedle, $iOffset) { 630 if ($this->oParserSettings->bMultibyteSupport) { 631 return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); 632 } else { 633 return strpos($sString, $sNeedle, $iOffset); 634 } 635 } 636 637} 638