1<?php
2
3namespace Pelago\Emogrifier;
4
5use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
6use Pelago\Emogrifier\Utilities\CssConcatenator;
7use Symfony\Component\CssSelector\CssSelectorConverter;
8use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
9
10/**
11 * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
12 *
13 * For Emogrifier 3.0.0, this will be the successor to the \Pelago\Emogrifier class (which then will be deprecated).
14 *
15 * For more information, please see the README.md file.
16 *
17 * @author Cameron Brooks
18 * @author Jaime Prado
19 * @author Oliver Klee <github@oliverklee.de>
20 * @author Roman Ožana <ozana@omdesign.cz>
21 * @author Sander Kruger <s.kruger@invessel.com>
22 * @author Zoli Szabó <zoli.szabo+github@gmail.com>
23 */
24class CssInliner extends AbstractHtmlProcessor
25{
26    /**
27     * @var int
28     */
29    const CACHE_KEY_CSS = 0;
30
31    /**
32     * @var int
33     */
34    const CACHE_KEY_SELECTOR = 1;
35
36    /**
37     * @var int
38     */
39    const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 2;
40
41    /**
42     * @var int
43     */
44    const CACHE_KEY_COMBINED_STYLES = 3;
45
46    /**
47     * Regular expression component matching a static pseudo class in a selector, without the preceding ":",
48     * for which the applicable elements can be determined (by converting the selector to an XPath expression).
49     * (Contains alternation without a group and is intended to be placed within a capturing, non-capturing or lookahead
50     * group, as appropriate for the usage context.)
51     *
52     * @var string
53     */
54    const PSEUDO_CLASS_MATCHER
55        = 'empty|(?:first|last|nth(?:-last)?+|only)-child|(?:first|last|nth(?:-last)?+)-of-type|not\\([[:ascii:]]*\\)';
56
57    /**
58     * @var bool[]
59     */
60    private $excludedSelectors = [];
61
62    /**
63     * @var bool[]
64     */
65    private $allowedMediaTypes = ['all' => true, 'screen' => true, 'print' => true];
66
67    /**
68     * @var mixed[]
69     */
70    private $caches = [
71        self::CACHE_KEY_CSS => [],
72        self::CACHE_KEY_SELECTOR => [],
73        self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
74        self::CACHE_KEY_COMBINED_STYLES => [],
75    ];
76
77    /**
78     * @var CssSelectorConverter
79     */
80    private $cssSelectorConverter = null;
81
82    /**
83     * the visited nodes with the XPath paths as array keys
84     *
85     * @var \DOMElement[]
86     */
87    private $visitedNodes = [];
88
89    /**
90     * the styles to apply to the nodes with the XPath paths as array keys for the outer array
91     * and the attribute names/values as key/value pairs for the inner array
92     *
93     * @var string[][]
94     */
95    private $styleAttributesForNodes = [];
96
97    /**
98     * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
99     * If set to false, the value of the style attributes will be discarded.
100     *
101     * @var bool
102     */
103    private $isInlineStyleAttributesParsingEnabled = true;
104
105    /**
106     * Determines whether the <style> blocks in the HTML passed to this class should be parsed.
107     *
108     * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML
109     * via inline styles.
110     *
111     * If set to false, the <style> blocks will be left as they are in the HTML.
112     *
113     * @var bool
114     */
115    private $isStyleBlocksParsingEnabled = true;
116
117    /**
118     * For calculating selector precedence order.
119     * Keys are a regular expression part to match before a CSS name.
120     * Values are a multiplier factor per match to weight specificity.
121     *
122     * @var int[]
123     */
124    private $selectorPrecedenceMatchers = [
125        // IDs: worth 10000
126        '\\#' => 10000,
127        // classes, attributes, pseudo-classes (not pseudo-elements) except `:not`: worth 100
128        '(?:\\.|\\[|(?<!:):(?!not\\())' => 100,
129        // elements (not attribute values or `:not`), pseudo-elements: worth 1
130        '(?:(?<![="\':\\w\\-])|::)' => 1,
131    ];
132
133    /**
134     * array of data describing CSS rules which apply to the document but cannot be inlined, in the format returned by
135     * `parseCssRules`
136     *
137     * @var string[][]
138     */
139    private $matchingUninlinableCssRules = null;
140
141    /**
142     * Emogrifier will throw Exceptions when it encounters an error instead of silently ignoring them.
143     *
144     * @var bool
145     */
146    private $debug = false;
147
148    /**
149     * Inlines the given CSS into the existing HTML.
150     *
151     * @param string $css the CSS to inline, must be UTF-8-encoded
152     *
153     * @return self fluent interface
154     *
155     * @throws SyntaxErrorException
156     */
157    public function inlineCss($css = '')
158    {
159        $this->clearAllCaches();
160        $this->purgeVisitedNodes();
161
162        $this->normalizeStyleAttributesOfAllNodes();
163
164        $combinedCss = $css;
165        // grab any existing style blocks from the HTML and append them to the existing CSS
166        // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
167        if ($this->isStyleBlocksParsingEnabled) {
168            $combinedCss .= $this->getCssFromAllStyleNodes();
169        }
170
171        $cssWithoutComments = $this->removeCssComments($combinedCss);
172        list($cssWithoutCommentsCharsetOrImport, $cssImportRules)
173            = $this->extractImportAndCharsetRules($cssWithoutComments);
174
175        $excludedNodes = $this->getNodesToExclude();
176        $cssRules = $this->parseCssRules($cssWithoutCommentsCharsetOrImport);
177        $cssSelectorConverter = $this->getCssSelectorConverter();
178        foreach ($cssRules['inlinable'] as $cssRule) {
179            try {
180                $nodesMatchingCssSelectors = $this->xPath->query($cssSelectorConverter->toXPath($cssRule['selector']));
181            } catch (SyntaxErrorException $e) {
182                if ($this->debug) {
183                    throw $e;
184                }
185                continue;
186            }
187
188            /** @var \DOMElement $node */
189            foreach ($nodesMatchingCssSelectors as $node) {
190                if (\in_array($node, $excludedNodes, true)) {
191                    continue;
192                }
193                $this->copyInlinableCssToStyleAttribute($node, $cssRule);
194            }
195        }
196
197        if ($this->isInlineStyleAttributesParsingEnabled) {
198            $this->fillStyleAttributesWithMergedStyles();
199        }
200
201        $this->removeImportantAnnotationFromAllInlineStyles();
202
203        $this->determineMatchingUninlinableCssRules($cssRules['uninlinable']);
204        $this->copyUninlinableCssToStyleNode($cssImportRules);
205
206        return $this;
207    }
208
209    /**
210     * Disables the parsing of inline styles.
211     *
212     * @return void
213     */
214    public function disableInlineStyleAttributesParsing()
215    {
216        $this->isInlineStyleAttributesParsingEnabled = false;
217    }
218
219    /**
220     * Disables the parsing of <style> blocks.
221     *
222     * @return void
223     */
224    public function disableStyleBlocksParsing()
225    {
226        $this->isStyleBlocksParsingEnabled = false;
227    }
228
229    /**
230     * Marks a media query type to keep.
231     *
232     * @param string $mediaName the media type name, e.g., "braille"
233     *
234     * @return void
235     */
236    public function addAllowedMediaType($mediaName)
237    {
238        $this->allowedMediaTypes[$mediaName] = true;
239    }
240
241    /**
242     * Drops a media query type from the allowed list.
243     *
244     * @param string $mediaName the tag name, e.g., "braille"
245     *
246     * @return void
247     */
248    public function removeAllowedMediaType($mediaName)
249    {
250        if (isset($this->allowedMediaTypes[$mediaName])) {
251            unset($this->allowedMediaTypes[$mediaName]);
252        }
253    }
254
255    /**
256     * Adds a selector to exclude nodes from emogrification.
257     *
258     * Any nodes that match the selector will not have their style altered.
259     *
260     * @param string $selector the selector to exclude, e.g., ".editor"
261     *
262     * @return void
263     */
264    public function addExcludedSelector($selector)
265    {
266        $this->excludedSelectors[$selector] = true;
267    }
268
269    /**
270     * No longer excludes the nodes matching this selector from emogrification.
271     *
272     * @param string $selector the selector to no longer exclude, e.g., ".editor"
273     *
274     * @return void
275     */
276    public function removeExcludedSelector($selector)
277    {
278        if (isset($this->excludedSelectors[$selector])) {
279            unset($this->excludedSelectors[$selector]);
280        }
281    }
282
283    /**
284     * Sets the debug mode.
285     *
286     * @param bool $debug set to true to enable debug mode
287     *
288     * @return void
289     */
290    public function setDebug($debug)
291    {
292        $this->debug = $debug;
293    }
294
295    /**
296     * Gets the array of selectors present in the CSS provided to `inlineCss()` for which the declarations could not be
297     * applied as inline styles, but which may affect elements in the HTML.  The relevant CSS will have been placed in a
298     * `<style>` element.  The selectors may include those used within `@media` rules or those involving dynamic
299     * pseudo-classes (such as `:hover`) or pseudo-elements (such as `::after`).
300     *
301     * @return string[]
302     *
303     * @throws \BadMethodCallException if `inlineCss` has not been called first
304     */
305    public function getMatchingUninlinableSelectors()
306    {
307        if ($this->matchingUninlinableCssRules === null) {
308            throw new \BadMethodCallException('inlineCss must be called first', 1568385221);
309        }
310
311        return \array_column($this->matchingUninlinableCssRules, 'selector');
312    }
313
314    /**
315     * Clears all caches.
316     *
317     * @return void
318     */
319    private function clearAllCaches()
320    {
321        $this->caches = [
322            self::CACHE_KEY_CSS => [],
323            self::CACHE_KEY_SELECTOR => [],
324            self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
325            self::CACHE_KEY_COMBINED_STYLES => [],
326        ];
327    }
328
329    /**
330     * Purges the visited nodes.
331     *
332     * @return void
333     */
334    private function purgeVisitedNodes()
335    {
336        $this->visitedNodes = [];
337        $this->styleAttributesForNodes = [];
338    }
339
340    /**
341     * Parses the document and normalizes all existing CSS attributes.
342     * This changes 'DISPLAY: none' to 'display: none'.
343     * We wouldn't have to do this if DOMXPath supported XPath 2.0.
344     * Also stores a reference of nodes with existing inline styles so we don't overwrite them.
345     *
346     * @return void
347     */
348    private function normalizeStyleAttributesOfAllNodes()
349    {
350        /** @var \DOMElement $node */
351        foreach ($this->getAllNodesWithStyleAttribute() as $node) {
352            if ($this->isInlineStyleAttributesParsingEnabled) {
353                $this->normalizeStyleAttributes($node);
354            }
355            // Remove style attribute in every case, so we can add them back (if inline style attributes
356            // parsing is enabled) to the end of the style list, thus keeping the right priority of CSS rules;
357            // else original inline style rules may remain at the beginning of the final inline style definition
358            // of a node, which may give not the desired results
359            $node->removeAttribute('style');
360        }
361    }
362
363    /**
364     * Returns a list with all DOM nodes that have a style attribute.
365     *
366     * @return \DOMNodeList
367     */
368    private function getAllNodesWithStyleAttribute()
369    {
370        return $this->xPath->query('//*[@style]');
371    }
372
373    /**
374     * Normalizes the value of the "style" attribute and saves it.
375     *
376     * @param \DOMElement $node
377     *
378     * @return void
379     */
380    private function normalizeStyleAttributes(\DOMElement $node)
381    {
382        $normalizedOriginalStyle = \preg_replace_callback(
383            '/-?+[_a-zA-Z][\\w\\-]*+(?=:)/S',
384            static function (array $m) {
385                return \strtolower($m[0]);
386            },
387            $node->getAttribute('style')
388        );
389
390        // in order to not overwrite existing style attributes in the HTML, we
391        // have to save the original HTML styles
392        $nodePath = $node->getNodePath();
393        if (!isset($this->styleAttributesForNodes[$nodePath])) {
394            $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
395            $this->visitedNodes[$nodePath] = $node;
396        }
397
398        $node->setAttribute('style', $normalizedOriginalStyle);
399    }
400
401    /**
402     * Parses a CSS declaration block into property name/value pairs.
403     *
404     * Example:
405     *
406     * The declaration block
407     *
408     *   "color: #000; font-weight: bold;"
409     *
410     * will be parsed into the following array:
411     *
412     *   "color" => "#000"
413     *   "font-weight" => "bold"
414     *
415     * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
416     *
417     * @return string[]
418     *         the CSS declarations with the property names as array keys and the property values as array values
419     */
420    private function parseCssDeclarationsBlock($cssDeclarationsBlock)
421    {
422        if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
423            return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
424        }
425
426        $properties = [];
427        foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
428            $matches = [];
429            if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
430                continue;
431            }
432
433            $propertyName = \strtolower($matches[1]);
434            $propertyValue = $matches[2];
435            $properties[$propertyName] = $propertyValue;
436        }
437        $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
438
439        return $properties;
440    }
441
442    /**
443     * Returns CSS content.
444     *
445     * @return string
446     */
447    private function getCssFromAllStyleNodes()
448    {
449        $styleNodes = $this->xPath->query('//style');
450        if ($styleNodes === false) {
451            return '';
452        }
453
454        $css = '';
455        /** @var \DOMNode $styleNode */
456        foreach ($styleNodes as $styleNode) {
457            $css .= "\n\n" . $styleNode->nodeValue;
458            $styleNode->parentNode->removeChild($styleNode);
459        }
460
461        return $css;
462    }
463
464    /**
465     * Removes comments from the supplied CSS.
466     *
467     * @param string $css
468     *
469     * @return string CSS with the comments removed
470     */
471    private function removeCssComments($css)
472    {
473        return \preg_replace('%/\\*[^*]*+(?:\\*(?!/)[^*]*+)*+\\*/%', '', $css);
474    }
475
476    /**
477     * Extracts `@import` and `@charset` rules from the supplied CSS.  These rules must not be preceded by any other
478     * rules, or they will be ignored.  (From the CSS 2.1 specification: "CSS 2.1 user agents must ignore any '@import'
479     * rule that occurs inside a block or after any non-ignored statement other than an @charset or an @import rule."
480     * Note also that `@charset` is case sensitive whereas `@import` is not.)
481     *
482     * @param string $css CSS with comments removed
483     *
484     * @return string[] The first element is the CSS with the valid `@import` and `@charset` rules removed.  The second
485     * element contains a concatenation of the valid `@import` rules, each followed by whatever whitespace followed it
486     * in the original CSS (so that either unminified or minified formatting is preserved); if there were no `@import`
487     * rules, it will be an empty string.  The (valid) `@charset` rules are discarded.
488     */
489    private function extractImportAndCharsetRules($css)
490    {
491        $possiblyModifiedCss = $css;
492        $importRules = '';
493
494        while (
495            \preg_match(
496                '/^\\s*+(@((?i)import(?-i)|charset)\\s[^;]++;\\s*+)/',
497                $possiblyModifiedCss,
498                $matches
499            )
500        ) {
501            list($fullMatch, $atRuleAndFollowingWhitespace, $atRuleName) = $matches;
502
503            if (\strtolower($atRuleName) === 'import') {
504                $importRules .= $atRuleAndFollowingWhitespace;
505            }
506
507            $possiblyModifiedCss = \substr($possiblyModifiedCss, \strlen($fullMatch));
508        }
509
510        return [$possiblyModifiedCss, $importRules];
511    }
512
513    /**
514     * Find the nodes that are not to be emogrified.
515     *
516     * @return \DOMElement[]
517     *
518     * @throws SyntaxErrorException
519     */
520    private function getNodesToExclude()
521    {
522        $excludedNodes = [];
523        foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) {
524            try {
525                $matchingNodes = $this->xPath->query($this->getCssSelectorConverter()->toXPath($selectorToExclude));
526            } catch (SyntaxErrorException $e) {
527                if ($this->debug) {
528                    throw $e;
529                }
530                continue;
531            }
532            foreach ($matchingNodes as $node) {
533                $excludedNodes[] = $node;
534            }
535        }
536
537        return $excludedNodes;
538    }
539
540    /**
541     * @return CssSelectorConverter
542     */
543    private function getCssSelectorConverter()
544    {
545        if ($this->cssSelectorConverter === null) {
546            $this->cssSelectorConverter = new CssSelectorConverter();
547        }
548
549        return $this->cssSelectorConverter;
550    }
551
552    /**
553     * Extracts and parses the individual rules from a CSS string.
554     *
555     * @param string $css a string of raw CSS code with comments removed
556     *
557     * @return string[][][] A 2-entry array with the key "inlinable" containing rules which can be inlined as `style`
558     *         attributes and the key "uninlinable" containing rules which cannot.  Each value is an array of string
559     *         sub-arrays with the keys
560     *         "media" (the media query string, e.g. "@media screen and (max-width: 480px)",
561     *         or an empty string if not from a `@media` rule),
562     *         "selector" (the CSS selector, e.g., "*" or "header h1"),
563     *         "hasUnmatchablePseudo" (true if that selector contains pseudo-elements or dynamic pseudo-classes
564     *         such that the declarations cannot be applied inline),
565     *         "declarationsBlock" (the semicolon-separated CSS declarations for that selector,
566     *         e.g., "color: red; height: 4px;"),
567     *         and "line" (the line number e.g. 42)
568     */
569    private function parseCssRules($css)
570    {
571        $cssKey = \md5($css);
572        if (isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
573            return $this->caches[self::CACHE_KEY_CSS][$cssKey];
574        }
575
576        $matches = $this->getCssRuleMatches($css);
577
578        $cssRules = [
579            'inlinable' => [],
580            'uninlinable' => [],
581        ];
582        /** @var string[][] $matches */
583        /** @var string[] $cssRule */
584        foreach ($matches as $key => $cssRule) {
585            $cssDeclaration = \trim($cssRule['declarations']);
586            if ($cssDeclaration === '') {
587                continue;
588            }
589
590            foreach (\explode(',', $cssRule['selectors']) as $selector) {
591                // don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
592                // only allow structural pseudo-classes
593                $hasPseudoElement = \strpos($selector, '::') !== false;
594                $hasUnsupportedPseudoClass = (bool)\preg_match(
595                    '/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-]/i',
596                    $selector
597                );
598                $hasUnmatchablePseudo = $hasPseudoElement || $hasUnsupportedPseudoClass;
599
600                $parsedCssRule = [
601                    'media' => $cssRule['media'],
602                    'selector' => \trim($selector),
603                    'hasUnmatchablePseudo' => $hasUnmatchablePseudo,
604                    'declarationsBlock' => $cssDeclaration,
605                    // keep track of where it appears in the file, since order is important
606                    'line' => $key,
607                ];
608                $ruleType = ($cssRule['media'] === '' && !$hasUnmatchablePseudo) ? 'inlinable' : 'uninlinable';
609                $cssRules[$ruleType][] = $parsedCssRule;
610            }
611        }
612
613        \usort($cssRules['inlinable'], [$this, 'sortBySelectorPrecedence']);
614
615        $this->caches[self::CACHE_KEY_CSS][$cssKey] = $cssRules;
616
617        return $cssRules;
618    }
619
620    /**
621     * @param string[] $a
622     * @param string[] $b
623     *
624     * @return int
625     */
626    private function sortBySelectorPrecedence(array $a, array $b)
627    {
628        $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
629        $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
630
631        // We want these sorted in ascending order so selectors with lesser precedence get processed first and
632        // selectors with greater precedence get sorted last.
633        $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1);
634        $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1);
635        return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
636    }
637
638    /**
639     * @param string $selector
640     *
641     * @return int
642     */
643    private function getCssSelectorPrecedence($selector)
644    {
645        $selectorKey = \md5($selector);
646        if (isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
647            return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
648        }
649
650        $precedence = 0;
651        foreach ($this->selectorPrecedenceMatchers as $matcher => $value) {
652            if (\trim($selector) === '') {
653                break;
654            }
655            $number = 0;
656            $selector = \preg_replace('/' . $matcher . '\\w+/', '', $selector, -1, $number);
657            $precedence += ($value * $number);
658        }
659        $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
660
661        return $precedence;
662    }
663
664    /**
665     * Parses a string of CSS into the media query, selectors and declarations for each ruleset in order.
666     *
667     * @param string $css CSS with comments removed
668     *
669     * @return string[][] Array of string sub-arrays with the keys
670     *         "media" (the media query string, e.g. "@media screen and (max-width: 480px)",
671     *         or an empty string if not from an `@media` rule),
672     *         "selectors" (the CSS selector(s), e.g., "*" or "h1, h2"),
673     *         "declarations" (the semicolon-separated CSS declarations for that/those selector(s),
674     *         e.g., "color: red; height: 4px;"),
675     */
676    private function getCssRuleMatches($css)
677    {
678        $splitCss = $this->splitCssAndMediaQuery($css);
679
680        $ruleMatches = [];
681        foreach ($splitCss as $cssPart) {
682            // process each part for selectors and definitions
683            \preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mi', $cssPart['css'], $matches, PREG_SET_ORDER);
684
685            /** @var string[][] $matches */
686            foreach ($matches as $cssRule) {
687                $ruleMatches[] = [
688                    'media' => $cssPart['media'],
689                    'selectors' => $cssRule[1],
690                    'declarations' => $cssRule[2],
691                ];
692            }
693        }
694
695        return $ruleMatches;
696    }
697
698    /**
699     * Splits input CSS code into an array of parts for different media queries, in order.
700     * Each part is an array where:
701     *
702     * - key "css" will contain clean CSS code (for @media rules this will be the group rule body within "{...}")
703     * - key "media" will contain "@media " followed by the media query list, for all allowed media queries,
704     *   or an empty string for CSS not within a media query
705     *
706     * Example:
707     *
708     * The CSS code
709     *
710     *   "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
711     *
712     * will be parsed into the following array:
713     *
714     *   0 => [
715     *     "css" => "h1 { color:red; }",
716     *     "media" => ""
717     *   ],
718     *   1 => [
719     *     "css" => " h1 {}",
720     *     "media" => "@media "
721     *   ]
722     *
723     * @param string $css
724     *
725     * @return string[][]
726     */
727    private function splitCssAndMediaQuery($css)
728    {
729        $mediaTypesExpression = '';
730        if (!empty($this->allowedMediaTypes)) {
731            $mediaTypesExpression = '|' . \implode('|', \array_keys($this->allowedMediaTypes));
732        }
733
734        $mediaRuleBodyMatcher = '[^{]*+{(?:[^{}]*+{.*})?\\s*+}\\s*+';
735
736        $cssSplitForAllowedMediaTypes = \preg_split(
737            '#(@media\\s++(?:only\\s++)?+(?:(?=[{(])' . $mediaTypesExpression . ')' . $mediaRuleBodyMatcher
738            . ')#misU',
739            $css,
740            -1,
741            PREG_SPLIT_DELIM_CAPTURE
742        );
743
744        // filter the CSS outside/between allowed @media rules
745        $cssCleaningMatchers = [
746            'import/charset directives' => '/\\s*+@(?:import|charset)\\s[^;]++;/i',
747            'remaining media enclosures' => '/\\s*+@media\\s' . $mediaRuleBodyMatcher . '/isU',
748        ];
749
750        $splitCss = [];
751        foreach ($cssSplitForAllowedMediaTypes as $index => $cssPart) {
752            $isMediaRule = $index % 2 !== 0;
753            if ($isMediaRule) {
754                \preg_match('/^([^{]*+){(.*)}[^}]*+$/s', $cssPart, $matches);
755                $splitCss[] = [
756                    'css' => $matches[2],
757                    'media' => $matches[1],
758                ];
759            } else {
760                $cleanedCss = \trim(\preg_replace($cssCleaningMatchers, '', $cssPart));
761                if ($cleanedCss !== '') {
762                    $splitCss[] = [
763                        'css' => $cleanedCss,
764                        'media' => '',
765                    ];
766                }
767            }
768        }
769        return $splitCss;
770    }
771
772    /**
773     * Copies $cssRule into the style attribute of $node.
774     *
775     * Note: This method does not check whether $cssRule matches $node.
776     *
777     * @param \DOMElement $node
778     * @param string[][] $cssRule
779     *
780     * @return void
781     */
782    private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule)
783    {
784        $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']);
785        if ($newStyleDeclarations === []) {
786            return;
787        }
788
789        // if it has a style attribute, get it, process it, and append (overwrite) new stuff
790        if ($node->hasAttribute('style')) {
791            // break it up into an associative array
792            $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
793        } else {
794            $oldStyleDeclarations = [];
795        }
796        $node->setAttribute(
797            'style',
798            $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)
799        );
800    }
801
802    /**
803     * This method merges old or existing name/value array with new name/value array
804     * and then generates a string of the combined style suitable for placing inline.
805     * This becomes the single point for CSS string generation allowing for consistent
806     * CSS output no matter where the CSS originally came from.
807     *
808     * @param string[] $oldStyles
809     * @param string[] $newStyles
810     *
811     * @return string
812     */
813    private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles)
814    {
815        $cacheKey = \serialize([$oldStyles, $newStyles]);
816        if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) {
817            return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey];
818        }
819
820        // Unset the overridden styles to preserve order, important if shorthand and individual properties are mixed
821        foreach ($oldStyles as $attributeName => $attributeValue) {
822            if (!isset($newStyles[$attributeName])) {
823                continue;
824            }
825
826            $newAttributeValue = $newStyles[$attributeName];
827            if (
828                $this->attributeValueIsImportant($attributeValue)
829                && !$this->attributeValueIsImportant($newAttributeValue)
830            ) {
831                unset($newStyles[$attributeName]);
832            } else {
833                unset($oldStyles[$attributeName]);
834            }
835        }
836
837        $combinedStyles = \array_merge($oldStyles, $newStyles);
838
839        $style = '';
840        foreach ($combinedStyles as $attributeName => $attributeValue) {
841            $style .= \strtolower(\trim($attributeName)) . ': ' . \trim($attributeValue) . '; ';
842        }
843        $trimmedStyle = \rtrim($style);
844
845        $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle;
846
847        return $trimmedStyle;
848    }
849
850    /**
851     * Checks whether $attributeValue is marked as !important.
852     *
853     * @param string $attributeValue
854     *
855     * @return bool
856     */
857    private function attributeValueIsImportant($attributeValue)
858    {
859        return \strtolower(\substr(\trim($attributeValue), -10)) === '!important';
860    }
861
862    /**
863     * Merges styles from styles attributes and style nodes and applies them to the attribute nodes
864     *
865     * @return void
866     */
867    private function fillStyleAttributesWithMergedStyles()
868    {
869        foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
870            $node = $this->visitedNodes[$nodePath];
871            $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
872            $node->setAttribute(
873                'style',
874                $this->generateStyleStringFromDeclarationsArrays(
875                    $currentStyleAttributes,
876                    $styleAttributesForNode
877                )
878            );
879        }
880    }
881
882    /**
883     * Searches for all nodes with a style attribute and removes the "!important" annotations out of
884     * the inline style declarations, eventually by rearranging declarations.
885     *
886     * @return void
887     */
888    private function removeImportantAnnotationFromAllInlineStyles()
889    {
890        foreach ($this->getAllNodesWithStyleAttribute() as $node) {
891            $this->removeImportantAnnotationFromNodeInlineStyle($node);
892        }
893    }
894
895    /**
896     * Removes the "!important" annotations out of the inline style declarations,
897     * eventually by rearranging declarations.
898     * Rearranging needed when !important shorthand properties are followed by some of their
899     * not !important expanded-version properties.
900     * For example "font: 12px serif !important; font-size: 13px;" must be reordered
901     * to "font-size: 13px; font: 12px serif;" in order to remain correct.
902     *
903     * @param \DOMElement $node
904     *
905     * @return void
906     */
907    private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node)
908    {
909        $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
910        $regularStyleDeclarations = [];
911        $importantStyleDeclarations = [];
912        foreach ($inlineStyleDeclarations as $property => $value) {
913            if ($this->attributeValueIsImportant($value)) {
914                $importantStyleDeclarations[$property] = \trim(\str_replace('!important', '', $value));
915            } else {
916                $regularStyleDeclarations[$property] = $value;
917            }
918        }
919        $inlineStyleDeclarationsInNewOrder = \array_merge(
920            $regularStyleDeclarations,
921            $importantStyleDeclarations
922        );
923        $node->setAttribute(
924            'style',
925            $this->generateStyleStringFromSingleDeclarationsArray($inlineStyleDeclarationsInNewOrder)
926        );
927    }
928
929    /**
930     * Generates a CSS style string suitable to be used inline from the $styleDeclarations property => value array.
931     *
932     * @param string[] $styleDeclarations
933     *
934     * @return string
935     */
936    private function generateStyleStringFromSingleDeclarationsArray(array $styleDeclarations)
937    {
938        return $this->generateStyleStringFromDeclarationsArrays([], $styleDeclarations);
939    }
940
941    /**
942     * Determines which of `$cssRules` actually apply to `$this->domDocument`, and sets them in
943     * `$this->matchingUninlinableCssRules`.
944     *
945     * @param string[][] $cssRules the "uninlinable" array of CSS rules returned by `parseCssRules`
946     *
947     * @return void
948     */
949    private function determineMatchingUninlinableCssRules(array $cssRules)
950    {
951        $this->matchingUninlinableCssRules = \array_filter($cssRules, [$this, 'existsMatchForSelectorInCssRule']);
952    }
953
954    /**
955     * Checks whether there is at least one matching element for the CSS selector contained in the `selector` element
956     * of the provided CSS rule.
957     *
958     * Any dynamic pseudo-classes will be assumed to apply. If the selector matches a pseudo-element,
959     * it will test for a match with its originating element.
960     *
961     * @param string[] $cssRule
962     *
963     * @return bool
964     *
965     * @throws SyntaxErrorException
966     */
967    private function existsMatchForSelectorInCssRule(array $cssRule)
968    {
969        $selector = $cssRule['selector'];
970        if ($cssRule['hasUnmatchablePseudo']) {
971            $selector = $this->removeUnmatchablePseudoComponents($selector);
972        }
973        return $this->existsMatchForCssSelector($selector);
974    }
975
976    /**
977     * Checks whether there is at least one matching element for $cssSelector.
978     * When not in debug mode, it returns true also for invalid selectors (because they may be valid,
979     * just not implemented/recognized yet by Emogrifier).
980     *
981     * @param string $cssSelector
982     *
983     * @return bool
984     *
985     * @throws SyntaxErrorException
986     */
987    private function existsMatchForCssSelector($cssSelector)
988    {
989        try {
990            $nodesMatchingSelector = $this->xPath->query($this->getCssSelectorConverter()->toXPath($cssSelector));
991        } catch (SyntaxErrorException $e) {
992            if ($this->debug) {
993                throw $e;
994            }
995            return true;
996        }
997
998        return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
999    }
1000
1001    /**
1002     * Removes pseudo-elements and dynamic pseudo-classes from a CSS selector, replacing them with "*" if necessary.
1003     * If such a pseudo-component is within the argument of `:not`, the entire `:not` component is removed or replaced.
1004     *
1005     * @param string $selector
1006     *
1007     * @return string Selector which will match the relevant DOM elements if the pseudo-classes are assumed to apply,
1008     *                or in the case of pseudo-elements will match their originating element.
1009     */
1010    private function removeUnmatchablePseudoComponents($selector)
1011    {
1012        // The regex allows nested brackets via `(?2)`.
1013        // A space is temporarily prepended because the callback can't determine if the match was at the very start.
1014        $selectorWithoutNots = \ltrim(\preg_replace_callback(
1015            '/(\\s?+):not(\\([^()]*+(?:(?2)[^()]*+)*+\\))/i',
1016            [$this, 'replaceUnmatchableNotComponent'],
1017            ' ' . $selector
1018        ));
1019
1020        $pseudoComponentMatcher = ':(?!' . self::PSEUDO_CLASS_MATCHER . '):?+[\\w\\-]++(?:\\([^\\)]*+\\))?+';
1021        return \preg_replace(
1022            ['/(\\s|^)' . $pseudoComponentMatcher . '/i', '/' . $pseudoComponentMatcher . '/i'],
1023            ['$1*', ''],
1024            $selectorWithoutNots
1025        );
1026    }
1027
1028    /**
1029     * Helps `removeUnmatchablePseudoComponents()` replace or remove a selector `:not(...)` component if its argument
1030     * contains pseudo-elements or dynamic pseudo-classes.
1031     *
1032     * @param string[] $matches array of elements matched by the regular expression
1033     *
1034     * @return string the full match if there were no unmatchable pseudo components within; otherwise, any preceding
1035     *         whitespace followed by "*", or an empty string if there was no preceding whitespace
1036     */
1037    private function replaceUnmatchableNotComponent(array $matches)
1038    {
1039        list($notComponentWithAnyPrecedingWhitespace, $anyPrecedingWhitespace, $notArgumentInBrackets) = $matches;
1040
1041        $hasUnmatchablePseudo = \preg_match(
1042            '/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-:]/i',
1043            $notArgumentInBrackets
1044        );
1045
1046        if ($hasUnmatchablePseudo) {
1047            return $anyPrecedingWhitespace !== '' ? $anyPrecedingWhitespace . '*' : '';
1048        }
1049        return $notComponentWithAnyPrecedingWhitespace;
1050    }
1051
1052    /**
1053     * Applies `$this->matchingUninlinableCssRules` to `$this->domDocument` by placing them as CSS in a `<style>`
1054     * element.
1055     *
1056     * @param string $cssImportRules This may contain any `@import` rules that should precede the CSS placed in the
1057     *        `<style>` element.  If there are no unlinlinable CSS rules to copy there, a `<style>` element will be
1058     *        created containing just `$cssImportRules`.  `$cssImportRules` may be an empty string; if it is, and there
1059     *        are no unlinlinable CSS rules, an empty `<style>` element will not be created.
1060     *
1061     * @return void
1062     */
1063    private function copyUninlinableCssToStyleNode($cssImportRules)
1064    {
1065        $css = $cssImportRules;
1066
1067        // avoid including unneeded class dependency if there are no rules
1068        if ($this->matchingUninlinableCssRules !== []) {
1069            $cssConcatenator = new CssConcatenator();
1070            foreach ($this->matchingUninlinableCssRules as $cssRule) {
1071                $cssConcatenator->append([$cssRule['selector']], $cssRule['declarationsBlock'], $cssRule['media']);
1072            }
1073            $css .= $cssConcatenator->getCss();
1074        }
1075
1076        // avoid adding empty style element
1077        if ($css !== '') {
1078            $this->addStyleElementToDocument($css);
1079        }
1080    }
1081
1082    /**
1083     * Adds a style element with $css to $this->domDocument.
1084     *
1085     * This method is protected to allow overriding.
1086     *
1087     * @see https://github.com/MyIntervals/emogrifier/issues/103
1088     *
1089     * @param string $css
1090     *
1091     * @return void
1092     */
1093    protected function addStyleElementToDocument($css)
1094    {
1095        $styleElement = $this->domDocument->createElement('style', $css);
1096        $styleAttribute = $this->domDocument->createAttribute('type');
1097        $styleAttribute->value = 'text/css';
1098        $styleElement->appendChild($styleAttribute);
1099
1100        $headElement = $this->getHeadElement();
1101        $headElement->appendChild($styleElement);
1102    }
1103
1104    /**
1105     * Returns the HEAD element.
1106     *
1107     * This method assumes that there always is a HEAD element.
1108     *
1109     * @return \DOMElement
1110     */
1111    private function getHeadElement()
1112    {
1113        return $this->domDocument->getElementsByTagName('head')->item(0);
1114    }
1115}
1116