1<?php
2
3/**
4* @package   s9e\TextFormatter
5* @copyright Copyright (c) 2010-2021 The s9e authors
6* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7*/
8namespace s9e\TextFormatter\Plugins\Litedown\Parser;
9
10class ParsedText
11{
12	/**
13	* @var bool Whether to decode HTML entities when decoding text
14	*/
15	public $decodeHtmlEntities = false;
16
17	/**
18	* @var bool Whether text contains escape characters
19	*/
20	protected $hasEscapedChars = false;
21
22	/**
23	* @var bool Whether text contains link references
24	*/
25	public $hasReferences = false;
26
27	/**
28	* @var array Array of [label => link info]
29	*/
30	public $linkReferences = [];
31
32	/**
33	* @var string Text being parsed
34	*/
35	protected $text;
36
37	/**
38	* @param string $text Original text
39	*/
40	public function __construct($text)
41	{
42		if (strpos($text, '\\') !== false && preg_match('/\\\\[!"\'()*<>[\\\\\\]^_`~]/', $text))
43		{
44			$this->hasEscapedChars = true;
45
46			// Encode escaped literals that have a special meaning otherwise, so that we don't have
47			// to take them into account in regexps
48			$text = strtr(
49				$text,
50				[
51					'\\!' => "\x1B0", '\\"'  => "\x1B1", "\\'" => "\x1B2", '\\(' => "\x1B3",
52					'\\)' => "\x1B4", '\\*'  => "\x1B5", '\\<' => "\x1B6", '\\>' => "\x1B7",
53					'\\[' => "\x1B8", '\\\\' => "\x1B9", '\\]' => "\x1BA", '\\^' => "\x1BB",
54					'\\_' => "\x1BC", '\\`'  => "\x1BD", '\\~' => "\x1BE"
55				]
56			);
57		}
58
59		// We append a couple of lines and a non-whitespace character at the end of the text in
60		// order to trigger the closure of all open blocks such as quotes and lists
61		$this->text = $text . "\n\n\x17";
62	}
63
64	/**
65	* @return string
66	*/
67	public function __toString()
68	{
69		return $this->text;
70	}
71
72	/**
73	* Return the character at given position
74	*
75	* @param  integer $pos
76	* @return string
77	*/
78	public function charAt($pos)
79	{
80		return $this->text[$pos];
81	}
82
83	/**
84	* Decode a chunk of encoded text to be used as an attribute value
85	*
86	* Decodes escaped literals and removes slashes and 0x1A characters
87	*
88	* @param  string $str Encoded text
89	* @return string      Decoded text
90	*/
91	public function decode($str)
92	{
93		if ($this->decodeHtmlEntities && strpos($str, '&') !== false)
94		{
95			$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
96		}
97		$str = str_replace("\x1A", '', $str);
98
99		if ($this->hasEscapedChars)
100		{
101			$str = strtr(
102				$str,
103				[
104					"\x1B0" => '!', "\x1B1" => '"',  "\x1B2" => "'", "\x1B3" => '(',
105					"\x1B4" => ')', "\x1B5" => '*',  "\x1B6" => '<', "\x1B7" => '>',
106					"\x1B8" => '[', "\x1B9" => '\\', "\x1BA" => ']', "\x1BB" => '^',
107					"\x1BC" => '_', "\x1BD" => '`',  "\x1BE" => '~'
108				]
109			);
110		}
111
112		return $str;
113	}
114
115	/**
116	* Find the first occurence of given substring starting at given position
117	*
118	* @param  string       $str
119	* @param  integer      $pos
120	* @return bool|integer
121	*/
122	public function indexOf($str, $pos = 0)
123	{
124		return strpos($this->text, $str, $pos);
125	}
126
127	/**
128	* Test whether given position is preceded by whitespace
129	*
130	* @param  integer $pos
131	* @return bool
132	*/
133	public function isAfterWhitespace($pos)
134	{
135		return ($pos > 0 && $this->isWhitespace($this->text[$pos - 1]));
136	}
137
138	/**
139	* Test whether given character is alphanumeric
140	*
141	* @param  string $chr
142	* @return bool
143	*/
144	public function isAlnum($chr)
145	{
146		return (strpos(' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', $chr) > 0);
147	}
148
149	/**
150	* Test whether given position is followed by whitespace
151	*
152	* @param  integer $pos
153	* @return bool
154	*/
155	public function isBeforeWhitespace($pos)
156	{
157		return $this->isWhitespace($this->text[$pos + 1]);
158	}
159
160	/**
161	* Test whether a length of text is surrounded by alphanumeric characters
162	*
163	* @param  integer $pos Start of the text
164	* @param  integer $len Length of the text
165	* @return bool
166	*/
167	public function isSurroundedByAlnum($pos, $len)
168	{
169		return ($pos > 0 && $this->isAlnum($this->text[$pos - 1]) && $this->isAlnum($this->text[$pos + $len]));
170	}
171
172	/**
173	* Test whether given character is an ASCII whitespace character
174	*
175	* NOTE: newlines are normalized to LF before parsing so we don't have to check for CR
176	*
177	* @param  string $chr
178	* @return bool
179	*/
180	public function isWhitespace($chr)
181	{
182		return (strpos(" \n\t", $chr) !== false);
183	}
184
185	/**
186	* Mark the boundary of a block in the original text
187	*
188	* @param  integer $pos
189	* @return void
190	*/
191	public function markBoundary($pos)
192	{
193		$this->text[$pos] = "\x17";
194	}
195
196	/**
197	* Overwrite part of the text with substitution characters ^Z (0x1A)
198	*
199	* @param  integer $pos Start of the range
200	* @param  integer $len Length of text to overwrite
201	* @return void
202	*/
203	public function overwrite($pos, $len)
204	{
205		if ($len > 0)
206		{
207			$this->text = substr($this->text, 0, $pos) . str_repeat("\x1A", $len) . substr($this->text, $pos + $len);
208		}
209	}
210}