1<?php
2namespace Sabberworm\CSS\Parsing;
3
4use Sabberworm\CSS\Comment\Comment;
5use Sabberworm\CSS\Parsing\UnexpectedTokenException;
6use Sabberworm\CSS\Settings;
7
8class ParserState {
9	private $oParserSettings;
10
11	private $sText;
12
13	private $aText;
14	private $iCurrentPosition;
15	private $sCharset;
16	private $iLength;
17	private $iLineNo;
18
19	public function __construct($sText, Settings $oParserSettings, $iLineNo = 1) {
20		$this->oParserSettings = $oParserSettings;
21		$this->sText = $sText;
22		$this->iCurrentPosition = 0;
23		$this->iLineNo = $iLineNo;
24		$this->setCharset($this->oParserSettings->sDefaultCharset);
25	}
26
27	public function setCharset($sCharset) {
28		$this->sCharset = $sCharset;
29		$this->aText = $this->strsplit($this->sText);
30		$this->iLength = count($this->aText);
31	}
32
33	public function getCharset() {
34		$this->oParserHelper->getCharset();
35		return $this->sCharset;
36	}
37
38	public function currentLine() {
39		return $this->iLineNo;
40	}
41
42	public function getSettings() {
43		return $this->oParserSettings;
44	}
45
46	public function parseIdentifier($bIgnoreCase = true) {
47		$sResult = $this->parseCharacter(true);
48		if ($sResult === null) {
49			throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier', $this->iLineNo);
50		}
51		$sCharacter = null;
52		while (($sCharacter = $this->parseCharacter(true)) !== null) {
53			$sResult .= $sCharacter;
54		}
55		if ($bIgnoreCase) {
56			$sResult = $this->strtolower($sResult);
57		}
58		return $sResult;
59	}
60
61	public function parseCharacter($bIsForIdentifier) {
62		if ($this->peek() === '\\') {
63			if ($bIsForIdentifier && $this->oParserSettings->bLenientParsing && ($this->comes('\0') || $this->comes('\9'))) {
64				// Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing.
65				return null;
66			}
67			$this->consume('\\');
68			if ($this->comes('\n') || $this->comes('\r')) {
69				return '';
70			}
71			if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) {
72				return $this->consume(1);
73			}
74			$sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u', 6);
75			if ($this->strlen($sUnicode) < 6) {
76				//Consume whitespace after incomplete unicode escape
77				if (preg_match('/\\s/isSu', $this->peek())) {
78					if ($this->comes('\r\n')) {
79						$this->consume(2);
80					} else {
81						$this->consume(1);
82					}
83				}
84			}
85			$iUnicode = intval($sUnicode, 16);
86			$sUtf32 = "";
87			for ($i = 0; $i < 4; ++$i) {
88				$sUtf32 .= chr($iUnicode & 0xff);
89				$iUnicode = $iUnicode >> 8;
90			}
91			return iconv('utf-32le', $this->sCharset, $sUtf32);
92		}
93		if ($bIsForIdentifier) {
94			$peek = ord($this->peek());
95			// Ranges: a-z A-Z 0-9 - _
96			if (($peek >= 97 && $peek <= 122) ||
97				($peek >= 65 && $peek <= 90) ||
98				($peek >= 48 && $peek <= 57) ||
99				($peek === 45) ||
100				($peek === 95) ||
101				($peek > 0xa1)) {
102				return $this->consume(1);
103			}
104		} else {
105			return $this->consume(1);
106		}
107		return null;
108	}
109
110	public function consumeWhiteSpace() {
111		$comments = array();
112		do {
113			while (preg_match('/\\s/isSu', $this->peek()) === 1) {
114				$this->consume(1);
115			}
116			if($this->oParserSettings->bLenientParsing) {
117				try {
118					$oComment = $this->consumeComment();
119				} catch(UnexpectedTokenException $e) {
120					// When we can’t find the end of a comment, we assume the document is finished.
121					$this->iCurrentPosition = $this->iLength;
122					return;
123				}
124			} else {
125				$oComment = $this->consumeComment();
126			}
127			if ($oComment !== false) {
128				$comments[] = $oComment;
129			}
130		} while($oComment !== false);
131		return $comments;
132	}
133
134	public function comes($sString, $bCaseInsensitive = false) {
135		$sPeek = $this->peek(strlen($sString));
136		return ($sPeek == '')
137			? false
138			: $this->streql($sPeek, $sString, $bCaseInsensitive);
139	}
140
141	public function peek($iLength = 1, $iOffset = 0) {
142		$iOffset += $this->iCurrentPosition;
143		if ($iOffset >= $this->iLength) {
144			return '';
145		}
146		return $this->substr($iOffset, $iLength);
147	}
148
149	public function consume($mValue = 1) {
150		if (is_string($mValue)) {
151			$iLineCount = substr_count($mValue, "\n");
152			$iLength = $this->strlen($mValue);
153			if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) {
154				throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)), $this->iLineNo);
155			}
156			$this->iLineNo += $iLineCount;
157			$this->iCurrentPosition += $this->strlen($mValue);
158			return $mValue;
159		} else {
160			if ($this->iCurrentPosition + $mValue > $this->iLength) {
161				throw new UnexpectedTokenException($mValue, $this->peek(5), 'count', $this->iLineNo);
162			}
163			$sResult = $this->substr($this->iCurrentPosition, $mValue);
164			$iLineCount = substr_count($sResult, "\n");
165			$this->iLineNo += $iLineCount;
166			$this->iCurrentPosition += $mValue;
167			return $sResult;
168		}
169	}
170
171	public function consumeExpression($mExpression, $iMaxLength = null) {
172		$aMatches = null;
173		$sInput = $iMaxLength !== null ? $this->peek($iMaxLength) : $this->inputLeft();
174		if (preg_match($mExpression, $sInput, $aMatches, PREG_OFFSET_CAPTURE) === 1) {
175			return $this->consume($aMatches[0][0]);
176		}
177		throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo);
178	}
179
180	/**
181	 * @return false|Comment
182	 */
183	public function consumeComment() {
184		$mComment = false;
185		if ($this->comes('/*')) {
186			$iLineNo = $this->iLineNo;
187			$this->consume(1);
188			$mComment = '';
189			while (($char = $this->consume(1)) !== '') {
190				$mComment .= $char;
191				if ($this->comes('*/')) {
192					$this->consume(2);
193					break;
194				}
195			}
196		}
197
198		if ($mComment !== false) {
199			// We skip the * which was included in the comment.
200			return new Comment(substr($mComment, 1), $iLineNo);
201		}
202
203		return $mComment;
204	}
205
206	public function isEnd() {
207		return $this->iCurrentPosition >= $this->iLength;
208	}
209
210	public function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, array &$comments = array()) {
211		$aEnd = is_array($aEnd) ? $aEnd : array($aEnd);
212		$out = '';
213		$start = $this->iCurrentPosition;
214
215		while (($char = $this->consume(1)) !== '') {
216			if (in_array($char, $aEnd)) {
217				if ($bIncludeEnd) {
218					$out .= $char;
219				} elseif (!$consumeEnd) {
220					$this->iCurrentPosition -= $this->strlen($char);
221				}
222				return $out;
223			}
224			$out .= $char;
225			if ($comment = $this->consumeComment()) {
226				$comments[] = $comment;
227			}
228		}
229
230		$this->iCurrentPosition = $start;
231		throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search', $this->iLineNo);
232	}
233
234	private function inputLeft() {
235		return $this->substr($this->iCurrentPosition, -1);
236	}
237
238	public function streql($sString1, $sString2, $bCaseInsensitive = true) {
239		if($bCaseInsensitive) {
240			return $this->strtolower($sString1) === $this->strtolower($sString2);
241		} else {
242			return $sString1 === $sString2;
243		}
244	}
245
246	public function backtrack($iAmount) {
247		$this->iCurrentPosition -= $iAmount;
248	}
249
250	public function strlen($sString) {
251		if ($this->oParserSettings->bMultibyteSupport) {
252			return mb_strlen($sString, $this->sCharset);
253		} else {
254			return strlen($sString);
255		}
256	}
257
258	private function substr($iStart, $iLength) {
259		if ($iLength < 0) {
260			$iLength = $this->iLength - $iStart + $iLength;
261		}
262		if ($iStart + $iLength > $this->iLength) {
263			$iLength = $this->iLength - $iStart;
264		}
265		$sResult = '';
266		while ($iLength > 0) {
267			$sResult .= $this->aText[$iStart];
268			$iStart++;
269			$iLength--;
270		}
271		return $sResult;
272	}
273
274	private function strtolower($sString) {
275		if ($this->oParserSettings->bMultibyteSupport) {
276			return mb_strtolower($sString, $this->sCharset);
277		} else {
278			return strtolower($sString);
279		}
280	}
281
282	private function strsplit($sString) {
283		if ($this->oParserSettings->bMultibyteSupport) {
284			if ($this->streql($this->sCharset, 'utf-8')) {
285				return preg_split('//u', $sString, null, PREG_SPLIT_NO_EMPTY);
286			} else {
287				$iLength = mb_strlen($sString, $this->sCharset);
288				$aResult = array();
289				for ($i = 0; $i < $iLength; ++$i) {
290					$aResult[] = mb_substr($sString, $i, 1, $this->sCharset);
291				}
292				return $aResult;
293			}
294		} else {
295			if($sString === '') {
296				return array();
297			} else {
298				return str_split($sString);
299			}
300		}
301	}
302
303	private function strpos($sString, $sNeedle, $iOffset) {
304		if ($this->oParserSettings->bMultibyteSupport) {
305			return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset);
306		} else {
307			return strpos($sString, $sNeedle, $iOffset);
308		}
309	}
310}