1<?php 2 3namespace Pelago\Emogrifier; 4 5use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor; 6use Pelago\Emogrifier\Utilities\CssConcatenator; 7use Symfony\Component\CssSelector\CssSelectorConverter; 8use Symfony\Component\CssSelector\Exception\SyntaxErrorException; 9 10/** 11 * This class provides functions for converting CSS styles into inline style attributes in your HTML code. 12 * 13 * For Emogrifier 3.0.0, this will be the successor to the \Pelago\Emogrifier class (which then will be deprecated). 14 * 15 * For more information, please see the README.md file. 16 * 17 * @author Cameron Brooks 18 * @author Jaime Prado 19 * @author Oliver Klee <github@oliverklee.de> 20 * @author Roman Ožana <ozana@omdesign.cz> 21 * @author Sander Kruger <s.kruger@invessel.com> 22 * @author Zoli Szabó <zoli.szabo+github@gmail.com> 23 */ 24class CssInliner extends AbstractHtmlProcessor 25{ 26 /** 27 * @var int 28 */ 29 const CACHE_KEY_CSS = 0; 30 31 /** 32 * @var int 33 */ 34 const CACHE_KEY_SELECTOR = 1; 35 36 /** 37 * @var int 38 */ 39 const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 2; 40 41 /** 42 * @var int 43 */ 44 const CACHE_KEY_COMBINED_STYLES = 3; 45 46 /** 47 * Regular expression component matching a static pseudo class in a selector, without the preceding ":", 48 * for which the applicable elements can be determined (by converting the selector to an XPath expression). 49 * (Contains alternation without a group and is intended to be placed within a capturing, non-capturing or lookahead 50 * group, as appropriate for the usage context.) 51 * 52 * @var string 53 */ 54 const PSEUDO_CLASS_MATCHER 55 = 'empty|(?:first|last|nth(?:-last)?+|only)-child|(?:first|last|nth(?:-last)?+)-of-type|not\\([[:ascii:]]*\\)'; 56 57 /** 58 * @var bool[] 59 */ 60 private $excludedSelectors = []; 61 62 /** 63 * @var bool[] 64 */ 65 private $allowedMediaTypes = ['all' => true, 'screen' => true, 'print' => true]; 66 67 /** 68 * @var mixed[] 69 */ 70 private $caches = [ 71 self::CACHE_KEY_CSS => [], 72 self::CACHE_KEY_SELECTOR => [], 73 self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [], 74 self::CACHE_KEY_COMBINED_STYLES => [], 75 ]; 76 77 /** 78 * @var CssSelectorConverter 79 */ 80 private $cssSelectorConverter = null; 81 82 /** 83 * the visited nodes with the XPath paths as array keys 84 * 85 * @var \DOMElement[] 86 */ 87 private $visitedNodes = []; 88 89 /** 90 * the styles to apply to the nodes with the XPath paths as array keys for the outer array 91 * and the attribute names/values as key/value pairs for the inner array 92 * 93 * @var string[][] 94 */ 95 private $styleAttributesForNodes = []; 96 97 /** 98 * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved. 99 * If set to false, the value of the style attributes will be discarded. 100 * 101 * @var bool 102 */ 103 private $isInlineStyleAttributesParsingEnabled = true; 104 105 /** 106 * Determines whether the <style> blocks in the HTML passed to this class should be parsed. 107 * 108 * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML 109 * via inline styles. 110 * 111 * If set to false, the <style> blocks will be left as they are in the HTML. 112 * 113 * @var bool 114 */ 115 private $isStyleBlocksParsingEnabled = true; 116 117 /** 118 * For calculating selector precedence order. 119 * Keys are a regular expression part to match before a CSS name. 120 * Values are a multiplier factor per match to weight specificity. 121 * 122 * @var int[] 123 */ 124 private $selectorPrecedenceMatchers = [ 125 // IDs: worth 10000 126 '\\#' => 10000, 127 // classes, attributes, pseudo-classes (not pseudo-elements) except `:not`: worth 100 128 '(?:\\.|\\[|(?<!:):(?!not\\())' => 100, 129 // elements (not attribute values or `:not`), pseudo-elements: worth 1 130 '(?:(?<![="\':\\w\\-])|::)' => 1, 131 ]; 132 133 /** 134 * array of data describing CSS rules which apply to the document but cannot be inlined, in the format returned by 135 * `parseCssRules` 136 * 137 * @var string[][] 138 */ 139 private $matchingUninlinableCssRules = null; 140 141 /** 142 * Emogrifier will throw Exceptions when it encounters an error instead of silently ignoring them. 143 * 144 * @var bool 145 */ 146 private $debug = false; 147 148 /** 149 * Inlines the given CSS into the existing HTML. 150 * 151 * @param string $css the CSS to inline, must be UTF-8-encoded 152 * 153 * @return self fluent interface 154 * 155 * @throws SyntaxErrorException 156 */ 157 public function inlineCss($css = '') 158 { 159 $this->clearAllCaches(); 160 $this->purgeVisitedNodes(); 161 162 $this->normalizeStyleAttributesOfAllNodes(); 163 164 $combinedCss = $css; 165 // grab any existing style blocks from the HTML and append them to the existing CSS 166 // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) 167 if ($this->isStyleBlocksParsingEnabled) { 168 $combinedCss .= $this->getCssFromAllStyleNodes(); 169 } 170 171 $cssWithoutComments = $this->removeCssComments($combinedCss); 172 list($cssWithoutCommentsCharsetOrImport, $cssImportRules) 173 = $this->extractImportAndCharsetRules($cssWithoutComments); 174 175 $excludedNodes = $this->getNodesToExclude(); 176 $cssRules = $this->parseCssRules($cssWithoutCommentsCharsetOrImport); 177 $cssSelectorConverter = $this->getCssSelectorConverter(); 178 foreach ($cssRules['inlinable'] as $cssRule) { 179 try { 180 $nodesMatchingCssSelectors = $this->xPath->query($cssSelectorConverter->toXPath($cssRule['selector'])); 181 } catch (SyntaxErrorException $e) { 182 if ($this->debug) { 183 throw $e; 184 } 185 continue; 186 } 187 188 /** @var \DOMElement $node */ 189 foreach ($nodesMatchingCssSelectors as $node) { 190 if (\in_array($node, $excludedNodes, true)) { 191 continue; 192 } 193 $this->copyInlinableCssToStyleAttribute($node, $cssRule); 194 } 195 } 196 197 if ($this->isInlineStyleAttributesParsingEnabled) { 198 $this->fillStyleAttributesWithMergedStyles(); 199 } 200 201 $this->removeImportantAnnotationFromAllInlineStyles(); 202 203 $this->determineMatchingUninlinableCssRules($cssRules['uninlinable']); 204 $this->copyUninlinableCssToStyleNode($cssImportRules); 205 206 return $this; 207 } 208 209 /** 210 * Disables the parsing of inline styles. 211 * 212 * @return void 213 */ 214 public function disableInlineStyleAttributesParsing() 215 { 216 $this->isInlineStyleAttributesParsingEnabled = false; 217 } 218 219 /** 220 * Disables the parsing of <style> blocks. 221 * 222 * @return void 223 */ 224 public function disableStyleBlocksParsing() 225 { 226 $this->isStyleBlocksParsingEnabled = false; 227 } 228 229 /** 230 * Marks a media query type to keep. 231 * 232 * @param string $mediaName the media type name, e.g., "braille" 233 * 234 * @return void 235 */ 236 public function addAllowedMediaType($mediaName) 237 { 238 $this->allowedMediaTypes[$mediaName] = true; 239 } 240 241 /** 242 * Drops a media query type from the allowed list. 243 * 244 * @param string $mediaName the tag name, e.g., "braille" 245 * 246 * @return void 247 */ 248 public function removeAllowedMediaType($mediaName) 249 { 250 if (isset($this->allowedMediaTypes[$mediaName])) { 251 unset($this->allowedMediaTypes[$mediaName]); 252 } 253 } 254 255 /** 256 * Adds a selector to exclude nodes from emogrification. 257 * 258 * Any nodes that match the selector will not have their style altered. 259 * 260 * @param string $selector the selector to exclude, e.g., ".editor" 261 * 262 * @return void 263 */ 264 public function addExcludedSelector($selector) 265 { 266 $this->excludedSelectors[$selector] = true; 267 } 268 269 /** 270 * No longer excludes the nodes matching this selector from emogrification. 271 * 272 * @param string $selector the selector to no longer exclude, e.g., ".editor" 273 * 274 * @return void 275 */ 276 public function removeExcludedSelector($selector) 277 { 278 if (isset($this->excludedSelectors[$selector])) { 279 unset($this->excludedSelectors[$selector]); 280 } 281 } 282 283 /** 284 * Sets the debug mode. 285 * 286 * @param bool $debug set to true to enable debug mode 287 * 288 * @return void 289 */ 290 public function setDebug($debug) 291 { 292 $this->debug = $debug; 293 } 294 295 /** 296 * Gets the array of selectors present in the CSS provided to `inlineCss()` for which the declarations could not be 297 * applied as inline styles, but which may affect elements in the HTML. The relevant CSS will have been placed in a 298 * `<style>` element. The selectors may include those used within `@media` rules or those involving dynamic 299 * pseudo-classes (such as `:hover`) or pseudo-elements (such as `::after`). 300 * 301 * @return string[] 302 * 303 * @throws \BadMethodCallException if `inlineCss` has not been called first 304 */ 305 public function getMatchingUninlinableSelectors() 306 { 307 if ($this->matchingUninlinableCssRules === null) { 308 throw new \BadMethodCallException('inlineCss must be called first', 1568385221); 309 } 310 311 return \array_column($this->matchingUninlinableCssRules, 'selector'); 312 } 313 314 /** 315 * Clears all caches. 316 * 317 * @return void 318 */ 319 private function clearAllCaches() 320 { 321 $this->caches = [ 322 self::CACHE_KEY_CSS => [], 323 self::CACHE_KEY_SELECTOR => [], 324 self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [], 325 self::CACHE_KEY_COMBINED_STYLES => [], 326 ]; 327 } 328 329 /** 330 * Purges the visited nodes. 331 * 332 * @return void 333 */ 334 private function purgeVisitedNodes() 335 { 336 $this->visitedNodes = []; 337 $this->styleAttributesForNodes = []; 338 } 339 340 /** 341 * Parses the document and normalizes all existing CSS attributes. 342 * This changes 'DISPLAY: none' to 'display: none'. 343 * We wouldn't have to do this if DOMXPath supported XPath 2.0. 344 * Also stores a reference of nodes with existing inline styles so we don't overwrite them. 345 * 346 * @return void 347 */ 348 private function normalizeStyleAttributesOfAllNodes() 349 { 350 /** @var \DOMElement $node */ 351 foreach ($this->getAllNodesWithStyleAttribute() as $node) { 352 if ($this->isInlineStyleAttributesParsingEnabled) { 353 $this->normalizeStyleAttributes($node); 354 } 355 // Remove style attribute in every case, so we can add them back (if inline style attributes 356 // parsing is enabled) to the end of the style list, thus keeping the right priority of CSS rules; 357 // else original inline style rules may remain at the beginning of the final inline style definition 358 // of a node, which may give not the desired results 359 $node->removeAttribute('style'); 360 } 361 } 362 363 /** 364 * Returns a list with all DOM nodes that have a style attribute. 365 * 366 * @return \DOMNodeList 367 */ 368 private function getAllNodesWithStyleAttribute() 369 { 370 return $this->xPath->query('//*[@style]'); 371 } 372 373 /** 374 * Normalizes the value of the "style" attribute and saves it. 375 * 376 * @param \DOMElement $node 377 * 378 * @return void 379 */ 380 private function normalizeStyleAttributes(\DOMElement $node) 381 { 382 $normalizedOriginalStyle = \preg_replace_callback( 383 '/-?+[_a-zA-Z][\\w\\-]*+(?=:)/S', 384 static function (array $m) { 385 return \strtolower($m[0]); 386 }, 387 $node->getAttribute('style') 388 ); 389 390 // in order to not overwrite existing style attributes in the HTML, we 391 // have to save the original HTML styles 392 $nodePath = $node->getNodePath(); 393 if (!isset($this->styleAttributesForNodes[$nodePath])) { 394 $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle); 395 $this->visitedNodes[$nodePath] = $node; 396 } 397 398 $node->setAttribute('style', $normalizedOriginalStyle); 399 } 400 401 /** 402 * Parses a CSS declaration block into property name/value pairs. 403 * 404 * Example: 405 * 406 * The declaration block 407 * 408 * "color: #000; font-weight: bold;" 409 * 410 * will be parsed into the following array: 411 * 412 * "color" => "#000" 413 * "font-weight" => "bold" 414 * 415 * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty 416 * 417 * @return string[] 418 * the CSS declarations with the property names as array keys and the property values as array values 419 */ 420 private function parseCssDeclarationsBlock($cssDeclarationsBlock) 421 { 422 if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) { 423 return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock]; 424 } 425 426 $properties = []; 427 foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) { 428 $matches = []; 429 if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) { 430 continue; 431 } 432 433 $propertyName = \strtolower($matches[1]); 434 $propertyValue = $matches[2]; 435 $properties[$propertyName] = $propertyValue; 436 } 437 $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties; 438 439 return $properties; 440 } 441 442 /** 443 * Returns CSS content. 444 * 445 * @return string 446 */ 447 private function getCssFromAllStyleNodes() 448 { 449 $styleNodes = $this->xPath->query('//style'); 450 if ($styleNodes === false) { 451 return ''; 452 } 453 454 $css = ''; 455 /** @var \DOMNode $styleNode */ 456 foreach ($styleNodes as $styleNode) { 457 $css .= "\n\n" . $styleNode->nodeValue; 458 $styleNode->parentNode->removeChild($styleNode); 459 } 460 461 return $css; 462 } 463 464 /** 465 * Removes comments from the supplied CSS. 466 * 467 * @param string $css 468 * 469 * @return string CSS with the comments removed 470 */ 471 private function removeCssComments($css) 472 { 473 return \preg_replace('%/\\*[^*]*+(?:\\*(?!/)[^*]*+)*+\\*/%', '', $css); 474 } 475 476 /** 477 * Extracts `@import` and `@charset` rules from the supplied CSS. These rules must not be preceded by any other 478 * rules, or they will be ignored. (From the CSS 2.1 specification: "CSS 2.1 user agents must ignore any '@import' 479 * rule that occurs inside a block or after any non-ignored statement other than an @charset or an @import rule." 480 * Note also that `@charset` is case sensitive whereas `@import` is not.) 481 * 482 * @param string $css CSS with comments removed 483 * 484 * @return string[] The first element is the CSS with the valid `@import` and `@charset` rules removed. The second 485 * element contains a concatenation of the valid `@import` rules, each followed by whatever whitespace followed it 486 * in the original CSS (so that either unminified or minified formatting is preserved); if there were no `@import` 487 * rules, it will be an empty string. The (valid) `@charset` rules are discarded. 488 */ 489 private function extractImportAndCharsetRules($css) 490 { 491 $possiblyModifiedCss = $css; 492 $importRules = ''; 493 494 while ( 495 \preg_match( 496 '/^\\s*+(@((?i)import(?-i)|charset)\\s[^;]++;\\s*+)/', 497 $possiblyModifiedCss, 498 $matches 499 ) 500 ) { 501 list($fullMatch, $atRuleAndFollowingWhitespace, $atRuleName) = $matches; 502 503 if (\strtolower($atRuleName) === 'import') { 504 $importRules .= $atRuleAndFollowingWhitespace; 505 } 506 507 $possiblyModifiedCss = \substr($possiblyModifiedCss, \strlen($fullMatch)); 508 } 509 510 return [$possiblyModifiedCss, $importRules]; 511 } 512 513 /** 514 * Find the nodes that are not to be emogrified. 515 * 516 * @return \DOMElement[] 517 * 518 * @throws SyntaxErrorException 519 */ 520 private function getNodesToExclude() 521 { 522 $excludedNodes = []; 523 foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) { 524 try { 525 $matchingNodes = $this->xPath->query($this->getCssSelectorConverter()->toXPath($selectorToExclude)); 526 } catch (SyntaxErrorException $e) { 527 if ($this->debug) { 528 throw $e; 529 } 530 continue; 531 } 532 foreach ($matchingNodes as $node) { 533 $excludedNodes[] = $node; 534 } 535 } 536 537 return $excludedNodes; 538 } 539 540 /** 541 * @return CssSelectorConverter 542 */ 543 private function getCssSelectorConverter() 544 { 545 if ($this->cssSelectorConverter === null) { 546 $this->cssSelectorConverter = new CssSelectorConverter(); 547 } 548 549 return $this->cssSelectorConverter; 550 } 551 552 /** 553 * Extracts and parses the individual rules from a CSS string. 554 * 555 * @param string $css a string of raw CSS code with comments removed 556 * 557 * @return string[][][] A 2-entry array with the key "inlinable" containing rules which can be inlined as `style` 558 * attributes and the key "uninlinable" containing rules which cannot. Each value is an array of string 559 * sub-arrays with the keys 560 * "media" (the media query string, e.g. "@media screen and (max-width: 480px)", 561 * or an empty string if not from a `@media` rule), 562 * "selector" (the CSS selector, e.g., "*" or "header h1"), 563 * "hasUnmatchablePseudo" (true if that selector contains pseudo-elements or dynamic pseudo-classes 564 * such that the declarations cannot be applied inline), 565 * "declarationsBlock" (the semicolon-separated CSS declarations for that selector, 566 * e.g., "color: red; height: 4px;"), 567 * and "line" (the line number e.g. 42) 568 */ 569 private function parseCssRules($css) 570 { 571 $cssKey = \md5($css); 572 if (isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) { 573 return $this->caches[self::CACHE_KEY_CSS][$cssKey]; 574 } 575 576 $matches = $this->getCssRuleMatches($css); 577 578 $cssRules = [ 579 'inlinable' => [], 580 'uninlinable' => [], 581 ]; 582 /** @var string[][] $matches */ 583 /** @var string[] $cssRule */ 584 foreach ($matches as $key => $cssRule) { 585 $cssDeclaration = \trim($cssRule['declarations']); 586 if ($cssDeclaration === '') { 587 continue; 588 } 589 590 foreach (\explode(',', $cssRule['selectors']) as $selector) { 591 // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; 592 // only allow structural pseudo-classes 593 $hasPseudoElement = \strpos($selector, '::') !== false; 594 $hasUnsupportedPseudoClass = (bool)\preg_match( 595 '/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-]/i', 596 $selector 597 ); 598 $hasUnmatchablePseudo = $hasPseudoElement || $hasUnsupportedPseudoClass; 599 600 $parsedCssRule = [ 601 'media' => $cssRule['media'], 602 'selector' => \trim($selector), 603 'hasUnmatchablePseudo' => $hasUnmatchablePseudo, 604 'declarationsBlock' => $cssDeclaration, 605 // keep track of where it appears in the file, since order is important 606 'line' => $key, 607 ]; 608 $ruleType = ($cssRule['media'] === '' && !$hasUnmatchablePseudo) ? 'inlinable' : 'uninlinable'; 609 $cssRules[$ruleType][] = $parsedCssRule; 610 } 611 } 612 613 \usort($cssRules['inlinable'], [$this, 'sortBySelectorPrecedence']); 614 615 $this->caches[self::CACHE_KEY_CSS][$cssKey] = $cssRules; 616 617 return $cssRules; 618 } 619 620 /** 621 * @param string[] $a 622 * @param string[] $b 623 * 624 * @return int 625 */ 626 private function sortBySelectorPrecedence(array $a, array $b) 627 { 628 $precedenceA = $this->getCssSelectorPrecedence($a['selector']); 629 $precedenceB = $this->getCssSelectorPrecedence($b['selector']); 630 631 // We want these sorted in ascending order so selectors with lesser precedence get processed first and 632 // selectors with greater precedence get sorted last. 633 $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1); 634 $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1); 635 return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals; 636 } 637 638 /** 639 * @param string $selector 640 * 641 * @return int 642 */ 643 private function getCssSelectorPrecedence($selector) 644 { 645 $selectorKey = \md5($selector); 646 if (isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) { 647 return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey]; 648 } 649 650 $precedence = 0; 651 foreach ($this->selectorPrecedenceMatchers as $matcher => $value) { 652 if (\trim($selector) === '') { 653 break; 654 } 655 $number = 0; 656 $selector = \preg_replace('/' . $matcher . '\\w+/', '', $selector, -1, $number); 657 $precedence += ($value * $number); 658 } 659 $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence; 660 661 return $precedence; 662 } 663 664 /** 665 * Parses a string of CSS into the media query, selectors and declarations for each ruleset in order. 666 * 667 * @param string $css CSS with comments removed 668 * 669 * @return string[][] Array of string sub-arrays with the keys 670 * "media" (the media query string, e.g. "@media screen and (max-width: 480px)", 671 * or an empty string if not from an `@media` rule), 672 * "selectors" (the CSS selector(s), e.g., "*" or "h1, h2"), 673 * "declarations" (the semicolon-separated CSS declarations for that/those selector(s), 674 * e.g., "color: red; height: 4px;"), 675 */ 676 private function getCssRuleMatches($css) 677 { 678 $splitCss = $this->splitCssAndMediaQuery($css); 679 680 $ruleMatches = []; 681 foreach ($splitCss as $cssPart) { 682 // process each part for selectors and definitions 683 \preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mi', $cssPart['css'], $matches, PREG_SET_ORDER); 684 685 /** @var string[][] $matches */ 686 foreach ($matches as $cssRule) { 687 $ruleMatches[] = [ 688 'media' => $cssPart['media'], 689 'selectors' => $cssRule[1], 690 'declarations' => $cssRule[2], 691 ]; 692 } 693 } 694 695 return $ruleMatches; 696 } 697 698 /** 699 * Splits input CSS code into an array of parts for different media queries, in order. 700 * Each part is an array where: 701 * 702 * - key "css" will contain clean CSS code (for @media rules this will be the group rule body within "{...}") 703 * - key "media" will contain "@media " followed by the media query list, for all allowed media queries, 704 * or an empty string for CSS not within a media query 705 * 706 * Example: 707 * 708 * The CSS code 709 * 710 * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}" 711 * 712 * will be parsed into the following array: 713 * 714 * 0 => [ 715 * "css" => "h1 { color:red; }", 716 * "media" => "" 717 * ], 718 * 1 => [ 719 * "css" => " h1 {}", 720 * "media" => "@media " 721 * ] 722 * 723 * @param string $css 724 * 725 * @return string[][] 726 */ 727 private function splitCssAndMediaQuery($css) 728 { 729 $mediaTypesExpression = ''; 730 if (!empty($this->allowedMediaTypes)) { 731 $mediaTypesExpression = '|' . \implode('|', \array_keys($this->allowedMediaTypes)); 732 } 733 734 $mediaRuleBodyMatcher = '[^{]*+{(?:[^{}]*+{.*})?\\s*+}\\s*+'; 735 736 $cssSplitForAllowedMediaTypes = \preg_split( 737 '#(@media\\s++(?:only\\s++)?+(?:(?=[{(])' . $mediaTypesExpression . ')' . $mediaRuleBodyMatcher 738 . ')#misU', 739 $css, 740 -1, 741 PREG_SPLIT_DELIM_CAPTURE 742 ); 743 744 // filter the CSS outside/between allowed @media rules 745 $cssCleaningMatchers = [ 746 'import/charset directives' => '/\\s*+@(?:import|charset)\\s[^;]++;/i', 747 'remaining media enclosures' => '/\\s*+@media\\s' . $mediaRuleBodyMatcher . '/isU', 748 ]; 749 750 $splitCss = []; 751 foreach ($cssSplitForAllowedMediaTypes as $index => $cssPart) { 752 $isMediaRule = $index % 2 !== 0; 753 if ($isMediaRule) { 754 \preg_match('/^([^{]*+){(.*)}[^}]*+$/s', $cssPart, $matches); 755 $splitCss[] = [ 756 'css' => $matches[2], 757 'media' => $matches[1], 758 ]; 759 } else { 760 $cleanedCss = \trim(\preg_replace($cssCleaningMatchers, '', $cssPart)); 761 if ($cleanedCss !== '') { 762 $splitCss[] = [ 763 'css' => $cleanedCss, 764 'media' => '', 765 ]; 766 } 767 } 768 } 769 return $splitCss; 770 } 771 772 /** 773 * Copies $cssRule into the style attribute of $node. 774 * 775 * Note: This method does not check whether $cssRule matches $node. 776 * 777 * @param \DOMElement $node 778 * @param string[][] $cssRule 779 * 780 * @return void 781 */ 782 private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule) 783 { 784 $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']); 785 if ($newStyleDeclarations === []) { 786 return; 787 } 788 789 // if it has a style attribute, get it, process it, and append (overwrite) new stuff 790 if ($node->hasAttribute('style')) { 791 // break it up into an associative array 792 $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); 793 } else { 794 $oldStyleDeclarations = []; 795 } 796 $node->setAttribute( 797 'style', 798 $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations) 799 ); 800 } 801 802 /** 803 * This method merges old or existing name/value array with new name/value array 804 * and then generates a string of the combined style suitable for placing inline. 805 * This becomes the single point for CSS string generation allowing for consistent 806 * CSS output no matter where the CSS originally came from. 807 * 808 * @param string[] $oldStyles 809 * @param string[] $newStyles 810 * 811 * @return string 812 */ 813 private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles) 814 { 815 $cacheKey = \serialize([$oldStyles, $newStyles]); 816 if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) { 817 return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey]; 818 } 819 820 // Unset the overridden styles to preserve order, important if shorthand and individual properties are mixed 821 foreach ($oldStyles as $attributeName => $attributeValue) { 822 if (!isset($newStyles[$attributeName])) { 823 continue; 824 } 825 826 $newAttributeValue = $newStyles[$attributeName]; 827 if ( 828 $this->attributeValueIsImportant($attributeValue) 829 && !$this->attributeValueIsImportant($newAttributeValue) 830 ) { 831 unset($newStyles[$attributeName]); 832 } else { 833 unset($oldStyles[$attributeName]); 834 } 835 } 836 837 $combinedStyles = \array_merge($oldStyles, $newStyles); 838 839 $style = ''; 840 foreach ($combinedStyles as $attributeName => $attributeValue) { 841 $style .= \strtolower(\trim($attributeName)) . ': ' . \trim($attributeValue) . '; '; 842 } 843 $trimmedStyle = \rtrim($style); 844 845 $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle; 846 847 return $trimmedStyle; 848 } 849 850 /** 851 * Checks whether $attributeValue is marked as !important. 852 * 853 * @param string $attributeValue 854 * 855 * @return bool 856 */ 857 private function attributeValueIsImportant($attributeValue) 858 { 859 return \strtolower(\substr(\trim($attributeValue), -10)) === '!important'; 860 } 861 862 /** 863 * Merges styles from styles attributes and style nodes and applies them to the attribute nodes 864 * 865 * @return void 866 */ 867 private function fillStyleAttributesWithMergedStyles() 868 { 869 foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) { 870 $node = $this->visitedNodes[$nodePath]; 871 $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style')); 872 $node->setAttribute( 873 'style', 874 $this->generateStyleStringFromDeclarationsArrays( 875 $currentStyleAttributes, 876 $styleAttributesForNode 877 ) 878 ); 879 } 880 } 881 882 /** 883 * Searches for all nodes with a style attribute and removes the "!important" annotations out of 884 * the inline style declarations, eventually by rearranging declarations. 885 * 886 * @return void 887 */ 888 private function removeImportantAnnotationFromAllInlineStyles() 889 { 890 foreach ($this->getAllNodesWithStyleAttribute() as $node) { 891 $this->removeImportantAnnotationFromNodeInlineStyle($node); 892 } 893 } 894 895 /** 896 * Removes the "!important" annotations out of the inline style declarations, 897 * eventually by rearranging declarations. 898 * Rearranging needed when !important shorthand properties are followed by some of their 899 * not !important expanded-version properties. 900 * For example "font: 12px serif !important; font-size: 13px;" must be reordered 901 * to "font-size: 13px; font: 12px serif;" in order to remain correct. 902 * 903 * @param \DOMElement $node 904 * 905 * @return void 906 */ 907 private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node) 908 { 909 $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); 910 $regularStyleDeclarations = []; 911 $importantStyleDeclarations = []; 912 foreach ($inlineStyleDeclarations as $property => $value) { 913 if ($this->attributeValueIsImportant($value)) { 914 $importantStyleDeclarations[$property] = \trim(\str_replace('!important', '', $value)); 915 } else { 916 $regularStyleDeclarations[$property] = $value; 917 } 918 } 919 $inlineStyleDeclarationsInNewOrder = \array_merge( 920 $regularStyleDeclarations, 921 $importantStyleDeclarations 922 ); 923 $node->setAttribute( 924 'style', 925 $this->generateStyleStringFromSingleDeclarationsArray($inlineStyleDeclarationsInNewOrder) 926 ); 927 } 928 929 /** 930 * Generates a CSS style string suitable to be used inline from the $styleDeclarations property => value array. 931 * 932 * @param string[] $styleDeclarations 933 * 934 * @return string 935 */ 936 private function generateStyleStringFromSingleDeclarationsArray(array $styleDeclarations) 937 { 938 return $this->generateStyleStringFromDeclarationsArrays([], $styleDeclarations); 939 } 940 941 /** 942 * Determines which of `$cssRules` actually apply to `$this->domDocument`, and sets them in 943 * `$this->matchingUninlinableCssRules`. 944 * 945 * @param string[][] $cssRules the "uninlinable" array of CSS rules returned by `parseCssRules` 946 * 947 * @return void 948 */ 949 private function determineMatchingUninlinableCssRules(array $cssRules) 950 { 951 $this->matchingUninlinableCssRules = \array_filter($cssRules, [$this, 'existsMatchForSelectorInCssRule']); 952 } 953 954 /** 955 * Checks whether there is at least one matching element for the CSS selector contained in the `selector` element 956 * of the provided CSS rule. 957 * 958 * Any dynamic pseudo-classes will be assumed to apply. If the selector matches a pseudo-element, 959 * it will test for a match with its originating element. 960 * 961 * @param string[] $cssRule 962 * 963 * @return bool 964 * 965 * @throws SyntaxErrorException 966 */ 967 private function existsMatchForSelectorInCssRule(array $cssRule) 968 { 969 $selector = $cssRule['selector']; 970 if ($cssRule['hasUnmatchablePseudo']) { 971 $selector = $this->removeUnmatchablePseudoComponents($selector); 972 } 973 return $this->existsMatchForCssSelector($selector); 974 } 975 976 /** 977 * Checks whether there is at least one matching element for $cssSelector. 978 * When not in debug mode, it returns true also for invalid selectors (because they may be valid, 979 * just not implemented/recognized yet by Emogrifier). 980 * 981 * @param string $cssSelector 982 * 983 * @return bool 984 * 985 * @throws SyntaxErrorException 986 */ 987 private function existsMatchForCssSelector($cssSelector) 988 { 989 try { 990 $nodesMatchingSelector = $this->xPath->query($this->getCssSelectorConverter()->toXPath($cssSelector)); 991 } catch (SyntaxErrorException $e) { 992 if ($this->debug) { 993 throw $e; 994 } 995 return true; 996 } 997 998 return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0; 999 } 1000 1001 /** 1002 * Removes pseudo-elements and dynamic pseudo-classes from a CSS selector, replacing them with "*" if necessary. 1003 * If such a pseudo-component is within the argument of `:not`, the entire `:not` component is removed or replaced. 1004 * 1005 * @param string $selector 1006 * 1007 * @return string Selector which will match the relevant DOM elements if the pseudo-classes are assumed to apply, 1008 * or in the case of pseudo-elements will match their originating element. 1009 */ 1010 private function removeUnmatchablePseudoComponents($selector) 1011 { 1012 // The regex allows nested brackets via `(?2)`. 1013 // A space is temporarily prepended because the callback can't determine if the match was at the very start. 1014 $selectorWithoutNots = \ltrim(\preg_replace_callback( 1015 '/(\\s?+):not(\\([^()]*+(?:(?2)[^()]*+)*+\\))/i', 1016 [$this, 'replaceUnmatchableNotComponent'], 1017 ' ' . $selector 1018 )); 1019 1020 $pseudoComponentMatcher = ':(?!' . self::PSEUDO_CLASS_MATCHER . '):?+[\\w\\-]++(?:\\([^\\)]*+\\))?+'; 1021 return \preg_replace( 1022 ['/(\\s|^)' . $pseudoComponentMatcher . '/i', '/' . $pseudoComponentMatcher . '/i'], 1023 ['$1*', ''], 1024 $selectorWithoutNots 1025 ); 1026 } 1027 1028 /** 1029 * Helps `removeUnmatchablePseudoComponents()` replace or remove a selector `:not(...)` component if its argument 1030 * contains pseudo-elements or dynamic pseudo-classes. 1031 * 1032 * @param string[] $matches array of elements matched by the regular expression 1033 * 1034 * @return string the full match if there were no unmatchable pseudo components within; otherwise, any preceding 1035 * whitespace followed by "*", or an empty string if there was no preceding whitespace 1036 */ 1037 private function replaceUnmatchableNotComponent(array $matches) 1038 { 1039 list($notComponentWithAnyPrecedingWhitespace, $anyPrecedingWhitespace, $notArgumentInBrackets) = $matches; 1040 1041 $hasUnmatchablePseudo = \preg_match( 1042 '/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-:]/i', 1043 $notArgumentInBrackets 1044 ); 1045 1046 if ($hasUnmatchablePseudo) { 1047 return $anyPrecedingWhitespace !== '' ? $anyPrecedingWhitespace . '*' : ''; 1048 } 1049 return $notComponentWithAnyPrecedingWhitespace; 1050 } 1051 1052 /** 1053 * Applies `$this->matchingUninlinableCssRules` to `$this->domDocument` by placing them as CSS in a `<style>` 1054 * element. 1055 * 1056 * @param string $cssImportRules This may contain any `@import` rules that should precede the CSS placed in the 1057 * `<style>` element. If there are no unlinlinable CSS rules to copy there, a `<style>` element will be 1058 * created containing just `$cssImportRules`. `$cssImportRules` may be an empty string; if it is, and there 1059 * are no unlinlinable CSS rules, an empty `<style>` element will not be created. 1060 * 1061 * @return void 1062 */ 1063 private function copyUninlinableCssToStyleNode($cssImportRules) 1064 { 1065 $css = $cssImportRules; 1066 1067 // avoid including unneeded class dependency if there are no rules 1068 if ($this->matchingUninlinableCssRules !== []) { 1069 $cssConcatenator = new CssConcatenator(); 1070 foreach ($this->matchingUninlinableCssRules as $cssRule) { 1071 $cssConcatenator->append([$cssRule['selector']], $cssRule['declarationsBlock'], $cssRule['media']); 1072 } 1073 $css .= $cssConcatenator->getCss(); 1074 } 1075 1076 // avoid adding empty style element 1077 if ($css !== '') { 1078 $this->addStyleElementToDocument($css); 1079 } 1080 } 1081 1082 /** 1083 * Adds a style element with $css to $this->domDocument. 1084 * 1085 * This method is protected to allow overriding. 1086 * 1087 * @see https://github.com/MyIntervals/emogrifier/issues/103 1088 * 1089 * @param string $css 1090 * 1091 * @return void 1092 */ 1093 protected function addStyleElementToDocument($css) 1094 { 1095 $styleElement = $this->domDocument->createElement('style', $css); 1096 $styleAttribute = $this->domDocument->createAttribute('type'); 1097 $styleAttribute->value = 'text/css'; 1098 $styleElement->appendChild($styleAttribute); 1099 1100 $headElement = $this->getHeadElement(); 1101 $headElement->appendChild($styleElement); 1102 } 1103 1104 /** 1105 * Returns the HEAD element. 1106 * 1107 * This method assumes that there always is a HEAD element. 1108 * 1109 * @return \DOMElement 1110 */ 1111 private function getHeadElement() 1112 { 1113 return $this->domDocument->getElementsByTagName('head')->item(0); 1114 } 1115} 1116