1<?php
2
3namespace Sabberworm\CSS;
4
5use Sabberworm\CSS\CSSList\CSSList;
6use Sabberworm\CSS\CSSList\Document;
7use Sabberworm\CSS\CSSList\KeyFrame;
8use Sabberworm\CSS\Property\AtRule;
9use Sabberworm\CSS\Property\Import;
10use Sabberworm\CSS\Property\Charset;
11use Sabberworm\CSS\Property\CSSNamespace;
12use Sabberworm\CSS\RuleSet\AtRuleSet;
13use Sabberworm\CSS\CSSList\AtRuleBlockList;
14use Sabberworm\CSS\RuleSet\DeclarationBlock;
15use Sabberworm\CSS\Value\CSSFunction;
16use Sabberworm\CSS\Value\RuleValueList;
17use Sabberworm\CSS\Value\Size;
18use Sabberworm\CSS\Value\Color;
19use Sabberworm\CSS\Value\URL;
20use Sabberworm\CSS\Value\CSSString;
21use Sabberworm\CSS\Rule\Rule;
22use Sabberworm\CSS\Parsing\UnexpectedTokenException;
23
24/**
25 * Parser class parses CSS from text into a data structure.
26 */
27class Parser {
28
29	private $aText;
30	private $iCurrentPosition;
31	private $oParserSettings;
32	private $sCharset;
33	private $iLength;
34	private $blockRules;
35	private $aSizeUnits;
36
37	public function __construct($sText, Settings $oParserSettings = null) {
38		$this->iCurrentPosition = 0;
39		if ($oParserSettings === null) {
40			$oParserSettings = Settings::create();
41		}
42		$this->oParserSettings = $oParserSettings;
43		if ($this->oParserSettings->bMultibyteSupport) {
44			$this->aText = preg_split('//u', $sText, null, PREG_SPLIT_NO_EMPTY);
45		} else {
46			if($sText === '') {
47				$this->aText = array();
48			} else {
49				$this->aText = str_split($sText);
50			}
51		}
52		$this->blockRules = explode('/', AtRule::BLOCK_RULES);
53
54		foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) {
55			$iSize = strlen($val);
56			if(!isset($this->aSizeUnits[$iSize])) {
57				$this->aSizeUnits[$iSize] = array();
58			}
59			$this->aSizeUnits[$iSize][strtolower($val)] = $val;
60		}
61		ksort($this->aSizeUnits, SORT_NUMERIC);
62	}
63
64	public function setCharset($sCharset) {
65		$this->sCharset = $sCharset;
66		$this->iLength = count($this->aText);
67	}
68
69	public function getCharset() {
70		return $this->sCharset;
71	}
72
73	public function parse() {
74		$this->setCharset($this->oParserSettings->sDefaultCharset);
75		$oResult = new Document();
76		$this->parseDocument($oResult);
77		return $oResult;
78	}
79
80	private function parseDocument(Document $oDocument) {
81		$this->consumeWhiteSpace();
82		$this->parseList($oDocument, true);
83	}
84
85	private function parseList(CSSList $oList, $bIsRoot = false) {
86		while (!$this->isEnd()) {
87			if ($this->comes('@')) {
88				$oList->append($this->parseAtRule());
89			} else if ($this->comes('}')) {
90				$this->consume('}');
91				if ($bIsRoot) {
92					throw new \Exception("Unopened {");
93				} else {
94					return;
95				}
96			} else {
97				if($this->oParserSettings->bLenientParsing) {
98					try {
99						$oList->append($this->parseSelector());
100					} catch (UnexpectedTokenException $e) {}
101				} else {
102					$oList->append($this->parseSelector());
103				}
104			}
105			$this->consumeWhiteSpace();
106		}
107		if (!$bIsRoot) {
108			throw new \Exception("Unexpected end of document");
109		}
110	}
111
112	private function parseAtRule() {
113		$this->consume('@');
114		$sIdentifier = $this->parseIdentifier();
115		$this->consumeWhiteSpace();
116		if ($sIdentifier === 'import') {
117			$oLocation = $this->parseURLValue();
118			$this->consumeWhiteSpace();
119			$sMediaQuery = null;
120			if (!$this->comes(';')) {
121				$sMediaQuery = $this->consumeUntil(';');
122			}
123			$this->consume(';');
124			return new Import($oLocation, $sMediaQuery);
125		} else if ($sIdentifier === 'charset') {
126			$sCharset = $this->parseStringValue();
127			$this->consumeWhiteSpace();
128			$this->consume(';');
129			$this->setCharset($sCharset->getString());
130			return new Charset($sCharset);
131		} else if ($this->identifierIs($sIdentifier, 'keyframes')) {
132			$oResult = new KeyFrame();
133			$oResult->setVendorKeyFrame($sIdentifier);
134			$oResult->setAnimationName(trim($this->consumeUntil('{', false, true)));
135			$this->consumeWhiteSpace();
136			$this->parseList($oResult);
137			return $oResult;
138		} else if ($sIdentifier === 'namespace') {
139			$sPrefix = null;
140			$mUrl = $this->parsePrimitiveValue();
141			if (!$this->comes(';')) {
142				$sPrefix = $mUrl;
143				$mUrl = $this->parsePrimitiveValue();
144			}
145			$this->consume(';');
146			if ($sPrefix !== null && !is_string($sPrefix)) {
147				throw new \Exception('Wrong namespace prefix '.$sPrefix);
148			}
149			if (!($mUrl instanceof CSSString || $mUrl instanceof URL)) {
150				throw new \Exception('Wrong namespace url of invalid type '.$mUrl);
151			}
152			return new CSSNamespace($mUrl, $sPrefix);
153		} else {
154			//Unknown other at rule (font-face or such)
155			$sArgs = trim($this->consumeUntil('{', false, true));
156			$this->consumeWhiteSpace();
157			$bUseRuleSet = true;
158			foreach($this->blockRules as $sBlockRuleName) {
159				if($this->identifierIs($sIdentifier, $sBlockRuleName)) {
160					$bUseRuleSet = false;
161					break;
162				}
163			}
164			if($bUseRuleSet) {
165				$oAtRule = new AtRuleSet($sIdentifier, $sArgs);
166				$this->parseRuleSet($oAtRule);
167			} else {
168				$oAtRule = new AtRuleBlockList($sIdentifier, $sArgs);
169				$this->parseList($oAtRule);
170			}
171			return $oAtRule;
172		}
173	}
174
175	private function parseIdentifier($bAllowFunctions = true, $bIgnoreCase = true) {
176		$sResult = $this->parseCharacter(true);
177		if ($sResult === null) {
178			throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier');
179		}
180		$sCharacter = null;
181		while (($sCharacter = $this->parseCharacter(true)) !== null) {
182			$sResult .= $sCharacter;
183		}
184		if ($bIgnoreCase) {
185			$sResult = $this->strtolower($sResult);
186		}
187		if ($bAllowFunctions && $this->comes('(')) {
188			$this->consume('(');
189			$aArguments = $this->parseValue(array('=', ' ', ','));
190			$sResult = new CSSFunction($sResult, $aArguments);
191			$this->consume(')');
192		}
193		return $sResult;
194	}
195
196	private function parseStringValue() {
197		$sBegin = $this->peek();
198		$sQuote = null;
199		if ($sBegin === "'") {
200			$sQuote = "'";
201		} else if ($sBegin === '"') {
202			$sQuote = '"';
203		}
204		if ($sQuote !== null) {
205			$this->consume($sQuote);
206		}
207		$sResult = "";
208		$sContent = null;
209		if ($sQuote === null) {
210			//Unquoted strings end in whitespace or with braces, brackets, parentheses
211			while (!preg_match('/[\\s{}()<>\\[\\]]/isu', $this->peek())) {
212				$sResult .= $this->parseCharacter(false);
213			}
214		} else {
215			while (!$this->comes($sQuote)) {
216				$sContent = $this->parseCharacter(false);
217				if ($sContent === null) {
218					throw new \Exception("Non-well-formed quoted string {$this->peek(3)}");
219				}
220				$sResult .= $sContent;
221			}
222			$this->consume($sQuote);
223		}
224		return new CSSString($sResult);
225	}
226
227	private function parseCharacter($bIsForIdentifier) {
228		if ($this->peek() === '\\') {
229			$this->consume('\\');
230			if ($this->comes('\n') || $this->comes('\r')) {
231				return '';
232			}
233			if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) {
234				return $this->consume(1);
235			}
236			$sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u');
237			if ($this->strlen($sUnicode) < 6) {
238				//Consume whitespace after incomplete unicode escape
239				if (preg_match('/\\s/isSu', $this->peek())) {
240					if ($this->comes('\r\n')) {
241						$this->consume(2);
242					} else {
243						$this->consume(1);
244					}
245				}
246			}
247			$iUnicode = intval($sUnicode, 16);
248			$sUtf32 = "";
249			for ($i = 0; $i < 4; ++$i) {
250				$sUtf32 .= chr($iUnicode & 0xff);
251				$iUnicode = $iUnicode >> 8;
252			}
253			return iconv('utf-32le', $this->sCharset, $sUtf32);
254		}
255		if ($bIsForIdentifier) {
256			$peek = ord($this->peek());
257			// Ranges: a-z A-Z 0-9 - _
258			if (($peek >= 97 && $peek <= 122) ||
259				($peek >= 65 && $peek <= 90) ||
260				($peek >= 48 && $peek <= 57) ||
261				($peek === 45) ||
262				($peek === 95) ||
263				($peek > 0xa1)) {
264				return $this->consume(1);
265			}
266		} else {
267			return $this->consume(1);
268		}
269		return null;
270	}
271
272	private function parseSelector() {
273		$oResult = new DeclarationBlock();
274		$oResult->setSelector($this->consumeUntil('{', false, true));
275		$this->consumeWhiteSpace();
276		$this->parseRuleSet($oResult);
277		return $oResult;
278	}
279
280	private function parseRuleSet($oRuleSet) {
281		while ($this->comes(';')) {
282			$this->consume(';');
283			$this->consumeWhiteSpace();
284		}
285		while (!$this->comes('}')) {
286			$oRule = null;
287			if($this->oParserSettings->bLenientParsing) {
288				try {
289					$oRule = $this->parseRule();
290				} catch (UnexpectedTokenException $e) {
291					try {
292						$sConsume = $this->consumeUntil(array("\n", ";", '}'), true);
293						// We need to “unfind” the matches to the end of the ruleSet as this will be matched later
294						if($this->streql(substr($sConsume, -1), '}')) {
295							--$this->iCurrentPosition;
296						} else {
297							$this->consumeWhiteSpace();
298							while ($this->comes(';')) {
299								$this->consume(';');
300							}
301						}
302					} catch (UnexpectedTokenException $e) {
303						// We’ve reached the end of the document. Just close the RuleSet.
304						return;
305					}
306				}
307			} else {
308				$oRule = $this->parseRule();
309			}
310			if($oRule) {
311				$oRuleSet->addRule($oRule);
312			}
313			$this->consumeWhiteSpace();
314		}
315		$this->consume('}');
316	}
317
318	private function parseRule() {
319		$oRule = new Rule($this->parseIdentifier());
320		$this->consumeWhiteSpace();
321		$this->consume(':');
322		$oValue = $this->parseValue(self::listDelimiterForRule($oRule->getRule()));
323		$oRule->setValue($oValue);
324		if ($this->comes('!')) {
325			$this->consume('!');
326			$this->consumeWhiteSpace();
327			$this->consume('important');
328			$oRule->setIsImportant(true);
329		}
330		while ($this->comes(';')) {
331			$this->consume(';');
332			$this->consumeWhiteSpace();
333		}
334		return $oRule;
335	}
336
337	private function parseValue($aListDelimiters) {
338		$aStack = array();
339		$this->consumeWhiteSpace();
340		//Build a list of delimiters and parsed values
341		while (!($this->comes('}') || $this->comes(';') || $this->comes('!') || $this->comes(')'))) {
342			if (count($aStack) > 0) {
343				$bFoundDelimiter = false;
344				foreach ($aListDelimiters as $sDelimiter) {
345					if ($this->comes($sDelimiter)) {
346						array_push($aStack, $this->consume($sDelimiter));
347						$this->consumeWhiteSpace();
348						$bFoundDelimiter = true;
349						break;
350					}
351				}
352				if (!$bFoundDelimiter) {
353					//Whitespace was the list delimiter
354					array_push($aStack, ' ');
355				}
356			}
357			array_push($aStack, $this->parsePrimitiveValue());
358			$this->consumeWhiteSpace();
359		}
360		//Convert the list to list objects
361		foreach ($aListDelimiters as $sDelimiter) {
362			if (count($aStack) === 1) {
363				return $aStack[0];
364			}
365			$iStartPosition = null;
366			while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) {
367				$iLength = 2; //Number of elements to be joined
368				for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) {
369					if ($sDelimiter !== $aStack[$i]) {
370						break;
371					}
372				}
373				$oList = new RuleValueList($sDelimiter);
374				for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) {
375					$oList->addListComponent($aStack[$i]);
376				}
377				array_splice($aStack, $iStartPosition - 1, $iLength * 2 - 1, array($oList));
378			}
379		}
380		return $aStack[0];
381	}
382
383	private static function listDelimiterForRule($sRule) {
384		if (preg_match('/^font($|-)/', $sRule)) {
385			return array(',', '/', ' ');
386		}
387		return array(',', ' ', '/');
388	}
389
390	private function parsePrimitiveValue() {
391		$oValue = null;
392		$this->consumeWhiteSpace();
393		if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) {
394			$oValue = $this->parseNumericValue();
395		} else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) {
396			$oValue = $this->parseColorValue();
397		} else if ($this->comes('url', true)) {
398			$oValue = $this->parseURLValue();
399		} else if ($this->comes("'") || $this->comes('"')) {
400			$oValue = $this->parseStringValue();
401		} else {
402			$oValue = $this->parseIdentifier(true, false);
403		}
404		$this->consumeWhiteSpace();
405		return $oValue;
406	}
407
408	private function parseNumericValue($bForColor = false) {
409		$sSize = '';
410		if ($this->comes('-')) {
411			$sSize .= $this->consume('-');
412		}
413		while (is_numeric($this->peek()) || $this->comes('.')) {
414			if ($this->comes('.')) {
415				$sSize .= $this->consume('.');
416			} else {
417				$sSize .= $this->consume(1);
418			}
419		}
420
421		$sUnit = null;
422		foreach ($this->aSizeUnits as $iLength => &$aValues) {
423			$sKey = strtolower($this->peek($iLength));
424			if(array_key_exists($sKey, $aValues)) {
425				if (($sUnit = $aValues[$sKey]) !== null) {
426					$this->consume($iLength);
427					break;
428				}
429			}
430		}
431		return new Size(floatval($sSize), $sUnit, $bForColor);
432	}
433
434	private function parseColorValue() {
435		$aColor = array();
436		if ($this->comes('#')) {
437			$this->consume('#');
438			$sValue = $this->parseIdentifier(false);
439			if ($this->strlen($sValue) === 3) {
440				$sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2];
441			}
442			$aColor = array('r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true), 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true), 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true));
443		} else {
444			$sColorMode = $this->parseIdentifier(false);
445			$this->consumeWhiteSpace();
446			$this->consume('(');
447			$iLength = $this->strlen($sColorMode);
448			for ($i = 0; $i < $iLength; ++$i) {
449				$this->consumeWhiteSpace();
450				$aColor[$sColorMode[$i]] = $this->parseNumericValue(true);
451				$this->consumeWhiteSpace();
452				if ($i < ($iLength - 1)) {
453					$this->consume(',');
454				}
455			}
456			$this->consume(')');
457		}
458		return new Color($aColor);
459	}
460
461	private function parseURLValue() {
462		$bUseUrl = $this->comes('url', true);
463		if ($bUseUrl) {
464			$this->consume('url');
465			$this->consumeWhiteSpace();
466			$this->consume('(');
467		}
468		$this->consumeWhiteSpace();
469		$oResult = new URL($this->parseStringValue());
470		if ($bUseUrl) {
471			$this->consumeWhiteSpace();
472			$this->consume(')');
473		}
474		return $oResult;
475	}
476
477	/**
478	 * Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too.
479	 */
480	private function identifierIs($sIdentifier, $sMatch) {
481		return (strcasecmp($sIdentifier, $sMatch) === 0)
482			?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1;
483	}
484
485	private function comes($sString, $bCaseInsensitive = false) {
486		$sPeek = $this->peek(strlen($sString));
487		return ($sPeek == '')
488			? false
489			: $this->streql($sPeek, $sString, $bCaseInsensitive);
490	}
491
492	private function peek($iLength = 1, $iOffset = 0) {
493		$iOffset += $this->iCurrentPosition;
494		if ($iOffset >= $this->iLength) {
495			return '';
496		}
497		$out = $this->substr($iOffset, $iLength);
498		return $out;
499	}
500
501	private function consume($mValue = 1) {
502		if (is_string($mValue)) {
503			$iLength = $this->strlen($mValue);
504			if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) {
505				throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)));
506			}
507			$this->iCurrentPosition += $this->strlen($mValue);
508			return $mValue;
509		} else {
510			if ($this->iCurrentPosition + $mValue > $this->iLength) {
511				throw new UnexpectedTokenException($mValue, $this->peek(5), 'count');
512			}
513			$sResult = $this->substr($this->iCurrentPosition, $mValue);
514			$this->iCurrentPosition += $mValue;
515			return $sResult;
516		}
517	}
518
519	private function consumeExpression($mExpression) {
520		$aMatches = null;
521		if (preg_match($mExpression, $this->inputLeft(), $aMatches, PREG_OFFSET_CAPTURE) === 1) {
522			return $this->consume($aMatches[0][0]);
523		}
524		throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression');
525	}
526
527	private function consumeWhiteSpace() {
528		do {
529			while (preg_match('/\\s/isSu', $this->peek()) === 1) {
530				$this->consume(1);
531			}
532			if($this->oParserSettings->bLenientParsing) {
533				try {
534					$bHasComment = $this->consumeComment();
535				} catch(UnexpectedTokenException $e) {
536					// When we can’t find the end of a comment, we assume the document is finished.
537					$this->iCurrentPosition = $this->iLength;
538					return;
539				}
540			} else {
541				$bHasComment = $this->consumeComment();
542			}
543		} while($bHasComment);
544	}
545
546	private function consumeComment() {
547		if ($this->comes('/*')) {
548			$this->consume(1);
549			while ($this->consume(1) !== '') {
550				if ($this->comes('*/')) {
551					$this->consume(2);
552					return true;
553				}
554			}
555		}
556		return false;
557	}
558
559	private function isEnd() {
560		return $this->iCurrentPosition >= $this->iLength;
561	}
562
563	private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false) {
564		$aEnd = is_array($aEnd) ? $aEnd : array($aEnd);
565		$out = '';
566		$start = $this->iCurrentPosition;
567
568		while (($char = $this->consume(1)) !== '') {
569			$this->consumeComment();
570			if (in_array($char, $aEnd)) {
571				if ($bIncludeEnd) {
572					$out .= $char;
573				} elseif (!$consumeEnd) {
574					$this->iCurrentPosition -= $this->strlen($char);
575				}
576				return $out;
577			}
578			$out .= $char;
579		}
580
581		$this->iCurrentPosition = $start;
582		throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search');
583	}
584
585	private function inputLeft() {
586		return $this->substr($this->iCurrentPosition, -1);
587	}
588
589	private function substr($iStart, $iLength) {
590		if ($iLength < 0) {
591			$iLength = $this->iLength - $iStart + $iLength;
592		}
593		if ($iStart + $iLength > $this->iLength) {
594			$iLength = $this->iLength - $iStart;
595		}
596		$out = '';
597		while ($iLength > 0) {
598			$out .= $this->aText[$iStart];
599			$iStart++;
600			$iLength--;
601		}
602		return $out;
603	}
604
605	private function strlen($sString) {
606		if ($this->oParserSettings->bMultibyteSupport) {
607			return mb_strlen($sString, $this->sCharset);
608		} else {
609			return strlen($sString);
610		}
611	}
612
613	private function streql($sString1, $sString2, $bCaseInsensitive = true) {
614		if($bCaseInsensitive) {
615			return $this->strtolower($sString1) === $this->strtolower($sString2);
616		} else {
617			return $sString1 === $sString2;
618		}
619	}
620
621	private function strtolower($sString) {
622		if ($this->oParserSettings->bMultibyteSupport) {
623			return mb_strtolower($sString, $this->sCharset);
624		} else {
625			return strtolower($sString);
626		}
627	}
628
629	private function strpos($sString, $sNeedle, $iOffset) {
630		if ($this->oParserSettings->bMultibyteSupport) {
631			return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset);
632		} else {
633			return strpos($sString, $sNeedle, $iOffset);
634		}
635	}
636
637}
638