1<?php
2
3/**
4* @package   s9e\TextFormatter
5* @copyright Copyright (c) 2010-2021 The s9e authors
6* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7*/
8namespace s9e\TextFormatter;
9
10use InvalidArgumentException;
11use RuntimeException;
12use s9e\TextFormatter\Parser\FilterProcessing;
13use s9e\TextFormatter\Parser\Logger;
14use s9e\TextFormatter\Parser\Tag;
15
16class Parser
17{
18	/**#@+
19	* Boolean rules bitfield
20	*/
21	const RULE_AUTO_CLOSE        = 1 << 0;
22	const RULE_AUTO_REOPEN       = 1 << 1;
23	const RULE_BREAK_PARAGRAPH   = 1 << 2;
24	const RULE_CREATE_PARAGRAPHS = 1 << 3;
25	const RULE_DISABLE_AUTO_BR   = 1 << 4;
26	const RULE_ENABLE_AUTO_BR    = 1 << 5;
27	const RULE_IGNORE_TAGS       = 1 << 6;
28	const RULE_IGNORE_TEXT       = 1 << 7;
29	const RULE_IGNORE_WHITESPACE = 1 << 8;
30	const RULE_IS_TRANSPARENT    = 1 << 9;
31	const RULE_PREVENT_BR        = 1 << 10;
32	const RULE_SUSPEND_AUTO_BR   = 1 << 11;
33	const RULE_TRIM_FIRST_LINE   = 1 << 12;
34	/**#@-*/
35
36	/**
37	* Bitwise disjunction of rules related to automatic line breaks
38	*/
39	const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR;
40
41	/**
42	* Bitwise disjunction of rules that are inherited by subcontexts
43	*/
44	const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR;
45
46	/**
47	* All the characters that are considered whitespace
48	*/
49	const WHITESPACE = " \n\t";
50
51	/**
52	* @var array Number of open tags for each tag name
53	*/
54	protected $cntOpen;
55
56	/**
57	* @var array Number of times each tag has been used
58	*/
59	protected $cntTotal;
60
61	/**
62	* @var array Current context
63	*/
64	protected $context;
65
66	/**
67	* @var integer How hard the parser has worked on fixing bad markup so far
68	*/
69	protected $currentFixingCost;
70
71	/**
72	* @var Tag Current tag being processed
73	*/
74	protected $currentTag;
75
76	/**
77	* @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/>
78	*/
79	protected $isRich;
80
81	/**
82	* @var Logger This parser's logger
83	*/
84	protected $logger;
85
86	/**
87	* @var integer How hard the parser should work on fixing bad markup
88	*/
89	public $maxFixingCost = 10000;
90
91	/**
92	* @var array Associative array of namespace prefixes in use in document (prefixes used as key)
93	*/
94	protected $namespaces;
95
96	/**
97	* @var array Stack of open tags (instances of Tag)
98	*/
99	protected $openTags;
100
101	/**
102	* @var string This parser's output
103	*/
104	protected $output;
105
106	/**
107	* @var integer Position of the cursor in the original text
108	*/
109	protected $pos;
110
111	/**
112	* @var array Array of callbacks, using plugin names as keys
113	*/
114	protected $pluginParsers = [];
115
116	/**
117	* @var array Associative array of [pluginName => pluginConfig]
118	*/
119	protected $pluginsConfig;
120
121	/**
122	* @var array Variables registered for use in filters
123	*/
124	public $registeredVars = [];
125
126	/**
127	* @var array Root context, used at the root of the document
128	*/
129	protected $rootContext;
130
131	/**
132	* @var array Tags' config
133	*/
134	protected $tagsConfig;
135
136	/**
137	* @var array Tag storage
138	*/
139	protected $tagStack;
140
141	/**
142	* @var bool Whether the tags in the stack are sorted
143	*/
144	protected $tagStackIsSorted;
145
146	/**
147	* @var string Text being parsed
148	*/
149	protected $text;
150
151	/**
152	* @var integer Length of the text being parsed
153	*/
154	protected $textLen;
155
156	/**
157	* @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect
158	*              whether the parser was reset during execution
159	*/
160	protected $uid = 0;
161
162	/**
163	* @var integer Position before which we output text verbatim, without paragraphs or linebreaks
164	*/
165	protected $wsPos;
166
167	/**
168	* Constructor
169	*/
170	public function __construct(array $config)
171	{
172		$this->pluginsConfig  = $config['plugins'];
173		$this->registeredVars = $config['registeredVars'];
174		$this->rootContext    = $config['rootContext'];
175		$this->tagsConfig     = $config['tags'];
176
177		$this->__wakeup();
178	}
179
180	/**
181	* Serializer
182	*
183	* Returns the properties that need to persist through serialization.
184	*
185	* NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice
186	* of the serializer to the user (e.g. igbinary)
187	*
188	* @return array
189	*/
190	public function __sleep()
191	{
192		return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig'];
193	}
194
195	/**
196	* Unserializer
197	*
198	* @return void
199	*/
200	public function __wakeup()
201	{
202		$this->logger = new Logger;
203	}
204
205	/**
206	* Reset the parser for a new parsing
207	*
208	* @param  string $text Text to be parsed
209	* @return void
210	*/
211	protected function reset($text)
212	{
213		// Reject invalid UTF-8
214		if (!preg_match('//u', $text))
215		{
216			throw new InvalidArgumentException('Invalid UTF-8 input');
217		}
218
219		// Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML
220		$text = preg_replace('/\\r\\n?/', "\n", $text);
221		$text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text);
222
223		// Clear the logs
224		$this->logger->clear();
225
226		// Initialize the rest
227		$this->cntOpen           = [];
228		$this->cntTotal          = [];
229		$this->currentFixingCost = 0;
230		$this->currentTag        = null;
231		$this->isRich            = false;
232		$this->namespaces        = [];
233		$this->openTags          = [];
234		$this->output            = '';
235		$this->pos               = 0;
236		$this->tagStack          = [];
237		$this->tagStackIsSorted  = false;
238		$this->text              = $text;
239		$this->textLen           = strlen($text);
240		$this->wsPos             = 0;
241
242		// Initialize the root context
243		$this->context = $this->rootContext;
244		$this->context['inParagraph'] = false;
245
246		// Bump the UID
247		++$this->uid;
248	}
249
250	/**
251	* Set a tag's option
252	*
253	* This method ensures that the tag's config is a value and not a reference, to prevent
254	* potential side-effects. References contained *inside* the tag's config are left untouched
255	*
256	* @param  string $tagName     Tag's name
257	* @param  string $optionName  Option's name
258	* @param  mixed  $optionValue Option's value
259	* @return void
260	*/
261	protected function setTagOption($tagName, $optionName, $optionValue)
262	{
263		if (isset($this->tagsConfig[$tagName]))
264		{
265			// Copy the tag's config and remove it. That will destroy the reference
266			$tagConfig = $this->tagsConfig[$tagName];
267			unset($this->tagsConfig[$tagName]);
268
269			// Set the new value and replace the tag's config
270			$tagConfig[$optionName]     = $optionValue;
271			$this->tagsConfig[$tagName] = $tagConfig;
272		}
273	}
274
275	//==========================================================================
276	// Public API
277	//==========================================================================
278
279	/**
280	* Disable a tag
281	*
282	* @param  string $tagName Name of the tag
283	* @return void
284	*/
285	public function disableTag($tagName)
286	{
287		$this->setTagOption($tagName, 'isDisabled', true);
288	}
289
290	/**
291	* Enable a tag
292	*
293	* @param  string $tagName Name of the tag
294	* @return void
295	*/
296	public function enableTag($tagName)
297	{
298		if (isset($this->tagsConfig[$tagName]))
299		{
300			unset($this->tagsConfig[$tagName]['isDisabled']);
301		}
302	}
303
304	/**
305	* Get this parser's Logger instance
306	*
307	* @return Logger
308	*/
309	public function getLogger()
310	{
311		return $this->logger;
312	}
313
314	/**
315	* Return the last text parsed
316	*
317	* This method returns the normalized text, which may be slightly different from the original
318	* text in that EOLs are normalized to LF and other control codes are stripped. This method is
319	* meant to be used in support of processing log entries, which contain offsets based on the
320	* normalized text
321	*
322	* @see Parser::reset()
323	*
324	* @return string
325	*/
326	public function getText()
327	{
328		return $this->text;
329	}
330
331	/**
332	* Parse a text
333	*
334	* @param  string $text Text to parse
335	* @return string       XML representation
336	*/
337	public function parse($text)
338	{
339		// Reset the parser and save the uid
340		$this->reset($text);
341		$uid = $this->uid;
342
343		// Do the heavy lifting
344		$this->executePluginParsers();
345		$this->processTags();
346
347		// Finalize the document
348		$this->finalizeOutput();
349
350		// Check the uid in case a plugin or a filter reset the parser mid-execution
351		if ($this->uid !== $uid)
352		{
353			throw new RuntimeException('The parser has been reset during execution');
354		}
355
356		// Log a warning if the fixing cost limit was exceeded
357		if ($this->currentFixingCost > $this->maxFixingCost)
358		{
359			$this->logger->warn('Fixing cost limit exceeded');
360		}
361
362		return $this->output;
363	}
364
365	/**
366	* Change a tag's tagLimit
367	*
368	* NOTE: the default tagLimit should generally be set during configuration instead
369	*
370	* @param  string  $tagName  The tag's name, in UPPERCASE
371	* @param  integer $tagLimit
372	* @return void
373	*/
374	public function setTagLimit($tagName, $tagLimit)
375	{
376		$this->setTagOption($tagName, 'tagLimit', $tagLimit);
377	}
378
379	/**
380	* Change a tag's nestingLimit
381	*
382	* NOTE: the default nestingLimit should generally be set during configuration instead
383	*
384	* @param  string  $tagName      The tag's name, in UPPERCASE
385	* @param  integer $nestingLimit
386	* @return void
387	*/
388	public function setNestingLimit($tagName, $nestingLimit)
389	{
390		$this->setTagOption($tagName, 'nestingLimit', $nestingLimit);
391	}
392
393	//==========================================================================
394	// Output handling
395	//==========================================================================
396
397	/**
398	* Finalize the output by appending the rest of the unprocessed text and create the root node
399	*
400	* @return void
401	*/
402	protected function finalizeOutput()
403	{
404		// Output the rest of the text and close the last paragraph
405		$this->outputText($this->textLen, 0, true);
406
407		// Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs
408		do
409		{
410			$this->output = preg_replace('(<([^ />]++)[^>]*></\\1>)', '', $this->output, -1, $cnt);
411		}
412		while ($cnt > 0);
413
414		// Merge consecutive <i> tags
415		if (strpos($this->output, '</i><i>') !== false)
416		{
417			$this->output = str_replace('</i><i>', '', $this->output);
418		}
419
420		// Remove control characters from the output to ensure it's valid XML
421		$this->output = preg_replace('([\\x00-\\x08\\x0B-\\x1F])', '', $this->output);
422
423		// Encode Unicode characters that are outside of the BMP
424		$this->output = Utils::encodeUnicodeSupplementaryCharacters($this->output);
425
426		// Use a <r> root if the text is rich, or <t> for plain text (including <p></p> and <br/>)
427		$tagName = ($this->isRich) ? 'r' : 't';
428
429		// Prepare the root node with all the namespace declarations
430		$tmp = '<' . $tagName;
431		foreach (array_keys($this->namespaces) as $prefix)
432		{
433			$tmp .= ' xmlns:' . $prefix . '="urn:s9e:TextFormatter:' . $prefix . '"';
434		}
435
436		$this->output = $tmp . '>' . $this->output . '</' . $tagName . '>';
437	}
438
439	/**
440	* Append a tag to the output
441	*
442	* @param  Tag  $tag Tag to append
443	* @return void
444	*/
445	protected function outputTag(Tag $tag)
446	{
447		$this->isRich = true;
448
449		$tagName  = $tag->getName();
450		$tagPos   = $tag->getPos();
451		$tagLen   = $tag->getLen();
452		$tagFlags = $tag->getFlags();
453
454		if ($tagFlags & self::RULE_IGNORE_WHITESPACE)
455		{
456			$skipBefore = 1;
457			$skipAfter  = ($tag->isEndTag()) ? 2 : 1;
458		}
459		else
460		{
461			$skipBefore = $skipAfter = 0;
462		}
463
464		// Current paragraph must end before the tag if:
465		//  - the tag is a start (or self-closing) tag and it breaks paragraphs, or
466		//  - the tag is an end tag (but not self-closing)
467		$closeParagraph = (!$tag->isStartTag() || ($tagFlags & self::RULE_BREAK_PARAGRAPH));
468
469		// Let the cursor catch up with this tag's position
470		$this->outputText($tagPos, $skipBefore, $closeParagraph);
471
472		// Capture the text consumed by the tag
473		$tagText = ($tagLen)
474		         ? htmlspecialchars(substr($this->text, $tagPos, $tagLen), ENT_NOQUOTES, 'UTF-8')
475		         : '';
476
477		// Output current tag
478		if ($tag->isStartTag())
479		{
480			// Handle paragraphs before opening the tag
481			if (!($tagFlags & self::RULE_BREAK_PARAGRAPH))
482			{
483				$this->outputParagraphStart($tagPos);
484			}
485
486			// Record this tag's namespace, if applicable
487			$colonPos = strpos($tagName, ':');
488			if ($colonPos)
489			{
490				$this->namespaces[substr($tagName, 0, $colonPos)] = 0;
491			}
492
493			// Open the start tag and add its attributes, but don't close the tag
494			$this->output .= '<' . $tagName;
495
496			// We output the attributes in lexical order. Helps canonicalizing the output and could
497			// prove useful someday
498			$attributes = $tag->getAttributes();
499			ksort($attributes);
500
501			foreach ($attributes as $attrName => $attrValue)
502			{
503				$this->output .= ' ' . $attrName . '="' . str_replace("\n", '&#10;', htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8')) . '"';
504			}
505
506			if ($tag->isSelfClosingTag())
507			{
508				if ($tagLen)
509				{
510					$this->output .= '>' . $tagText . '</' . $tagName . '>';
511				}
512				else
513				{
514					$this->output .= '/>';
515				}
516			}
517			elseif ($tagLen)
518			{
519				$this->output .= '><s>' . $tagText . '</s>';
520			}
521			else
522			{
523				$this->output .= '>';
524			}
525		}
526		else
527		{
528			if ($tagLen)
529			{
530				$this->output .= '<e>' . $tagText . '</e>';
531			}
532
533			$this->output .= '</' . $tagName . '>';
534		}
535
536		// Move the cursor past the tag
537		$this->pos = $tagPos + $tagLen;
538
539		// Skip newlines (no other whitespace) after this tag
540		$this->wsPos = $this->pos;
541		while ($skipAfter && $this->wsPos < $this->textLen && $this->text[$this->wsPos] === "\n")
542		{
543			// Decrement the number of lines to skip
544			--$skipAfter;
545
546			// Move the cursor past the newline
547			++$this->wsPos;
548		}
549	}
550
551	/**
552	* Output the text between the cursor's position (included) and given position (not included)
553	*
554	* @param  integer $catchupPos     Position we're catching up to
555	* @param  integer $maxLines       Maximum number of lines to ignore at the end of the text
556	* @param  bool    $closeParagraph Whether to close the paragraph at the end, if applicable
557	* @return void
558	*/
559	protected function outputText($catchupPos, $maxLines, $closeParagraph)
560	{
561		if ($closeParagraph)
562		{
563			if (!($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
564			{
565				$closeParagraph = false;
566			}
567			else
568			{
569				// Ignore any number of lines at the end if we're closing a paragraph
570				$maxLines = -1;
571			}
572		}
573
574		if ($this->pos >= $catchupPos)
575		{
576			// We're already there, close the paragraph if applicable and return
577			if ($closeParagraph)
578			{
579				$this->outputParagraphEnd();
580			}
581
582			return;
583		}
584
585		// Skip over previously identified whitespace if applicable
586		if ($this->wsPos > $this->pos)
587		{
588			$skipPos       = min($catchupPos, $this->wsPos);
589			$this->output .= substr($this->text, $this->pos, $skipPos - $this->pos);
590			$this->pos     = $skipPos;
591
592			if ($this->pos >= $catchupPos)
593			{
594				// Skipped everything. Close the paragraph if applicable and return
595				if ($closeParagraph)
596				{
597					$this->outputParagraphEnd();
598				}
599
600				return;
601			}
602		}
603
604		// Test whether we're even supposed to output anything
605		if ($this->context['flags'] & self::RULE_IGNORE_TEXT)
606		{
607			$catchupLen  = $catchupPos - $this->pos;
608			$catchupText = substr($this->text, $this->pos, $catchupLen);
609
610			// If the catchup text is not entirely composed of whitespace, we put it inside ignore
611			// tags
612			if (strspn($catchupText, " \n\t") < $catchupLen)
613			{
614				$catchupText = '<i>' . htmlspecialchars($catchupText, ENT_NOQUOTES, 'UTF-8') . '</i>';
615			}
616
617			$this->output .= $catchupText;
618			$this->pos = $catchupPos;
619
620			if ($closeParagraph)
621			{
622				$this->outputParagraphEnd();
623			}
624
625			return;
626		}
627
628		// Compute the amount of text to ignore at the end of the output
629		$ignorePos = $catchupPos;
630		$ignoreLen = 0;
631
632		// Ignore as many lines (including whitespace) as specified
633		while ($maxLines && --$ignorePos >= $this->pos)
634		{
635			$c = $this->text[$ignorePos];
636			if (strpos(self::WHITESPACE, $c) === false)
637			{
638				break;
639			}
640
641			if ($c === "\n")
642			{
643				--$maxLines;
644			}
645
646			++$ignoreLen;
647		}
648
649		// Adjust $catchupPos to ignore the text at the end
650		$catchupPos -= $ignoreLen;
651
652		// Break down the text in paragraphs if applicable
653		if ($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS)
654		{
655			if (!$this->context['inParagraph'])
656			{
657				$this->outputWhitespace($catchupPos);
658
659				if ($catchupPos > $this->pos)
660				{
661					$this->outputParagraphStart($catchupPos);
662				}
663			}
664
665			// Look for a paragraph break in this text
666			$pbPos = strpos($this->text, "\n\n", $this->pos);
667
668			while ($pbPos !== false && $pbPos < $catchupPos)
669			{
670				$this->outputText($pbPos, 0, true);
671				$this->outputParagraphStart($catchupPos);
672
673				$pbPos = strpos($this->text, "\n\n", $this->pos);
674			}
675		}
676
677		// Capture, escape and output the text
678		if ($catchupPos > $this->pos)
679		{
680			$catchupText = htmlspecialchars(
681				substr($this->text, $this->pos, $catchupPos - $this->pos),
682				ENT_NOQUOTES,
683				'UTF-8'
684			);
685
686			// Format line breaks if applicable
687			if (($this->context['flags'] & self::RULES_AUTO_LINEBREAKS) === self::RULE_ENABLE_AUTO_BR)
688			{
689				$catchupText = str_replace("\n", "<br/>\n", $catchupText);
690			}
691
692			$this->output .= $catchupText;
693		}
694
695		// Close the paragraph if applicable
696		if ($closeParagraph)
697		{
698			$this->outputParagraphEnd();
699		}
700
701		// Add the ignored text if applicable
702		if ($ignoreLen)
703		{
704			$this->output .= substr($this->text, $catchupPos, $ignoreLen);
705		}
706
707		// Move the cursor past the text
708		$this->pos = $catchupPos + $ignoreLen;
709	}
710
711	/**
712	* Output a linebreak tag
713	*
714	* @param  Tag  $tag
715	* @return void
716	*/
717	protected function outputBrTag(Tag $tag)
718	{
719		$this->outputText($tag->getPos(), 0, false);
720		$this->output .= '<br/>';
721	}
722
723	/**
724	* Output an ignore tag
725	*
726	* @param  Tag  $tag
727	* @return void
728	*/
729	protected function outputIgnoreTag(Tag $tag)
730	{
731		$tagPos = $tag->getPos();
732		$tagLen = $tag->getLen();
733
734		// Capture the text to ignore
735		$ignoreText = substr($this->text, $tagPos, $tagLen);
736
737		// Catch up with the tag's position then output the tag
738		$this->outputText($tagPos, 0, false);
739		$this->output .= '<i>' . htmlspecialchars($ignoreText, ENT_NOQUOTES, 'UTF-8') . '</i>';
740		$this->isRich = true;
741
742		// Move the cursor past this tag
743		$this->pos = $tagPos + $tagLen;
744	}
745
746	/**
747	* Start a paragraph between current position and given position, if applicable
748	*
749	* @param  integer $maxPos Rightmost position at which the paragraph can be opened
750	* @return void
751	*/
752	protected function outputParagraphStart($maxPos)
753	{
754		// Do nothing if we're already in a paragraph, or if we don't use paragraphs
755		if ($this->context['inParagraph']
756		 || !($this->context['flags'] & self::RULE_CREATE_PARAGRAPHS))
757		{
758			return;
759		}
760
761		// Output the whitespace between $this->pos and $maxPos if applicable
762		$this->outputWhitespace($maxPos);
763
764		// Open the paragraph, but only if it's not at the very end of the text
765		if ($this->pos < $this->textLen)
766		{
767			$this->output .= '<p>';
768			$this->context['inParagraph'] = true;
769		}
770	}
771
772	/**
773	* Close current paragraph at current position if applicable
774	*
775	* @return void
776	*/
777	protected function outputParagraphEnd()
778	{
779		// Do nothing if we're not in a paragraph
780		if (!$this->context['inParagraph'])
781		{
782			return;
783		}
784
785		$this->output .= '</p>';
786		$this->context['inParagraph'] = false;
787	}
788
789	/**
790	* Output the content of a verbatim tag
791	*
792	* @param  Tag  $tag
793	* @return void
794	*/
795	protected function outputVerbatim(Tag $tag)
796	{
797		$flags = $this->context['flags'];
798		$this->context['flags'] = $tag->getFlags();
799		$this->outputText($this->currentTag->getPos() + $this->currentTag->getLen(), 0, false);
800		$this->context['flags'] = $flags;
801	}
802
803	/**
804	* Skip as much whitespace after current position as possible
805	*
806	* @param  integer $maxPos Rightmost character to be skipped
807	* @return void
808	*/
809	protected function outputWhitespace($maxPos)
810	{
811		if ($maxPos > $this->pos)
812		{
813			$spn = strspn($this->text, self::WHITESPACE, $this->pos, $maxPos - $this->pos);
814
815			if ($spn)
816			{
817				$this->output .= substr($this->text, $this->pos, $spn);
818				$this->pos += $spn;
819			}
820		}
821	}
822
823	//==========================================================================
824	// Plugins handling
825	//==========================================================================
826
827	/**
828	* Disable a plugin
829	*
830	* @param  string $pluginName Name of the plugin
831	* @return void
832	*/
833	public function disablePlugin($pluginName)
834	{
835		if (isset($this->pluginsConfig[$pluginName]))
836		{
837			// Copy the plugin's config to remove the reference
838			$pluginConfig = $this->pluginsConfig[$pluginName];
839			unset($this->pluginsConfig[$pluginName]);
840
841			// Update the value and replace the plugin's config
842			$pluginConfig['isDisabled'] = true;
843			$this->pluginsConfig[$pluginName] = $pluginConfig;
844		}
845	}
846
847	/**
848	* Enable a plugin
849	*
850	* @param  string $pluginName Name of the plugin
851	* @return void
852	*/
853	public function enablePlugin($pluginName)
854	{
855		if (isset($this->pluginsConfig[$pluginName]))
856		{
857			$this->pluginsConfig[$pluginName]['isDisabled'] = false;
858		}
859	}
860
861	/**
862	* Execute given plugin
863	*
864	* @param  string $pluginName Plugin's name
865	* @return void
866	*/
867	protected function executePluginParser($pluginName)
868	{
869		$pluginConfig = $this->pluginsConfig[$pluginName];
870		if (isset($pluginConfig['quickMatch']) && strpos($this->text, $pluginConfig['quickMatch']) === false)
871		{
872			return;
873		}
874
875		$matches = [];
876		if (isset($pluginConfig['regexp'], $pluginConfig['regexpLimit']))
877		{
878			$matches = $this->getMatches($pluginConfig['regexp'], $pluginConfig['regexpLimit']);
879			if (empty($matches))
880			{
881				return;
882			}
883		}
884
885		// Execute the plugin's parser, which will add tags via $this->addStartTag() and others
886		call_user_func($this->getPluginParser($pluginName), $this->text, $matches);
887	}
888
889	/**
890	* Execute all the plugins
891	*
892	* @return void
893	*/
894	protected function executePluginParsers()
895	{
896		foreach ($this->pluginsConfig as $pluginName => $pluginConfig)
897		{
898			if (empty($pluginConfig['isDisabled']))
899			{
900				$this->executePluginParser($pluginName);
901			}
902		}
903	}
904
905	/**
906	* Execute given regexp and returns as many matches as given limit
907	*
908	* @param  string  $regexp
909	* @param  integer $limit
910	* @return array
911	*/
912	protected function getMatches($regexp, $limit)
913	{
914		$cnt = preg_match_all($regexp, $this->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
915		if ($cnt > $limit)
916		{
917			$matches = array_slice($matches, 0, $limit);
918		}
919
920		return $matches;
921	}
922
923	/**
924	* Get the cached callback for given plugin's parser
925	*
926	* @param  string $pluginName Plugin's name
927	* @return callable
928	*/
929	protected function getPluginParser($pluginName)
930	{
931		// Cache a new instance of this plugin's parser if there isn't one already
932		if (!isset($this->pluginParsers[$pluginName]))
933		{
934			$pluginConfig = $this->pluginsConfig[$pluginName];
935			$className = (isset($pluginConfig['className']))
936			           ? $pluginConfig['className']
937			           : 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
938
939			// Register the parser as a callback
940			$this->pluginParsers[$pluginName] = [new $className($this, $pluginConfig), 'parse'];
941		}
942
943		return $this->pluginParsers[$pluginName];
944	}
945
946	/**
947	* Register a parser
948	*
949	* Can be used to add a new parser with no plugin config, or pre-generate a parser for an
950	* existing plugin
951	*
952	* @param  string   $pluginName
953	* @param  callback $parser
954	* @param  string   $regexp
955	* @param  integer  $limit
956	* @return void
957	*/
958	public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX)
959	{
960		if (!is_callable($parser))
961		{
962			throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback');
963		}
964		// Create an empty config for this plugin to ensure it is executed
965		if (!isset($this->pluginsConfig[$pluginName]))
966		{
967			$this->pluginsConfig[$pluginName] = [];
968		}
969		if (isset($regexp))
970		{
971			$this->pluginsConfig[$pluginName]['regexp']      = $regexp;
972			$this->pluginsConfig[$pluginName]['regexpLimit'] = $limit;
973		}
974		$this->pluginParsers[$pluginName] = $parser;
975	}
976
977	//==========================================================================
978	// Rules handling
979	//==========================================================================
980
981	/**
982	* Apply closeAncestor rules associated with given tag
983	*
984	* @param  Tag  $tag Tag
985	* @return bool      Whether a new tag has been added
986	*/
987	protected function closeAncestor(Tag $tag)
988	{
989		if (!empty($this->openTags))
990		{
991			$tagName   = $tag->getName();
992			$tagConfig = $this->tagsConfig[$tagName];
993
994			if (!empty($tagConfig['rules']['closeAncestor']))
995			{
996				$i = count($this->openTags);
997
998				while (--$i >= 0)
999				{
1000					$ancestor     = $this->openTags[$i];
1001					$ancestorName = $ancestor->getName();
1002
1003					if (isset($tagConfig['rules']['closeAncestor'][$ancestorName]))
1004					{
1005						++$this->currentFixingCost;
1006
1007						// We have to close this ancestor. First we reinsert this tag...
1008						$this->tagStack[] = $tag;
1009
1010						// ...then we add a new end tag for it with a better priority
1011						$this->addMagicEndTag($ancestor, $tag->getPos(), $tag->getSortPriority() - 1);
1012
1013						return true;
1014					}
1015				}
1016			}
1017		}
1018
1019		return false;
1020	}
1021
1022	/**
1023	* Apply closeParent rules associated with given tag
1024	*
1025	* @param  Tag  $tag Tag
1026	* @return bool      Whether a new tag has been added
1027	*/
1028	protected function closeParent(Tag $tag)
1029	{
1030		if (!empty($this->openTags))
1031		{
1032			$tagName   = $tag->getName();
1033			$tagConfig = $this->tagsConfig[$tagName];
1034
1035			if (!empty($tagConfig['rules']['closeParent']))
1036			{
1037				$parent     = end($this->openTags);
1038				$parentName = $parent->getName();
1039
1040				if (isset($tagConfig['rules']['closeParent'][$parentName]))
1041				{
1042					++$this->currentFixingCost;
1043
1044					// We have to close that parent. First we reinsert the tag...
1045					$this->tagStack[] = $tag;
1046
1047					// ...then we add a new end tag for it with a better priority
1048					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1049
1050					return true;
1051				}
1052			}
1053		}
1054
1055		return false;
1056	}
1057
1058	/**
1059	* Apply the createChild rules associated with given tag
1060	*
1061	* @param  Tag  $tag Tag
1062	* @return void
1063	*/
1064	protected function createChild(Tag $tag)
1065	{
1066		$tagConfig = $this->tagsConfig[$tag->getName()];
1067		if (isset($tagConfig['rules']['createChild']))
1068		{
1069			$priority = -1000;
1070			$tagPos   = $this->pos + strspn($this->text, " \n\r\t", $this->pos);
1071			foreach ($tagConfig['rules']['createChild'] as $tagName)
1072			{
1073				$this->addStartTag($tagName, $tagPos, 0, ++$priority);
1074			}
1075		}
1076	}
1077
1078	/**
1079	* Apply fosterParent rules associated with given tag
1080	*
1081	* NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to
1082	*       foster itself or two or more tags try to foster each other in a loop. We mitigate the
1083	*       risk by preventing a tag from creating a child of itself (the parent still gets closed)
1084	*       and by checking and increasing the currentFixingCost so that a loop of multiple tags
1085	*       do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the
1086	*       loop from running indefinitely
1087	*
1088	* @param  Tag  $tag Tag
1089	* @return bool      Whether a new tag has been added
1090	*/
1091	protected function fosterParent(Tag $tag)
1092	{
1093		if (!empty($this->openTags))
1094		{
1095			$tagName   = $tag->getName();
1096			$tagConfig = $this->tagsConfig[$tagName];
1097
1098			if (!empty($tagConfig['rules']['fosterParent']))
1099			{
1100				$parent     = end($this->openTags);
1101				$parentName = $parent->getName();
1102
1103				if (isset($tagConfig['rules']['fosterParent'][$parentName]))
1104				{
1105					if ($parentName !== $tagName && $this->currentFixingCost < $this->maxFixingCost)
1106					{
1107						$this->addFosterTag($tag, $parent);
1108					}
1109
1110					// Reinsert current tag
1111					$this->tagStack[] = $tag;
1112
1113					// And finally close its parent with a priority that ensures it is processed
1114					// before this tag
1115					$this->addMagicEndTag($parent, $tag->getPos(), $tag->getSortPriority() - 1);
1116
1117					// Adjust the fixing cost to account for the additional tags/processing
1118					$this->currentFixingCost += 4;
1119
1120					return true;
1121				}
1122			}
1123		}
1124
1125		return false;
1126	}
1127
1128	/**
1129	* Apply requireAncestor rules associated with given tag
1130	*
1131	* @param  Tag  $tag Tag
1132	* @return bool      Whether this tag has an unfulfilled requireAncestor requirement
1133	*/
1134	protected function requireAncestor(Tag $tag)
1135	{
1136		$tagName   = $tag->getName();
1137		$tagConfig = $this->tagsConfig[$tagName];
1138
1139		if (isset($tagConfig['rules']['requireAncestor']))
1140		{
1141			foreach ($tagConfig['rules']['requireAncestor'] as $ancestorName)
1142			{
1143				if (!empty($this->cntOpen[$ancestorName]))
1144				{
1145					return false;
1146				}
1147			}
1148
1149			$this->logger->err('Tag requires an ancestor', [
1150				'requireAncestor' => implode(',', $tagConfig['rules']['requireAncestor']),
1151				'tag'             => $tag
1152			]);
1153
1154			return true;
1155		}
1156
1157		return false;
1158	}
1159
1160	//==========================================================================
1161	// Tag processing
1162	//==========================================================================
1163
1164	/**
1165	* Create and add a copy of a tag as a child of a given tag
1166	*
1167	* @param  Tag  $tag       Current tag
1168	* @param  Tag  $fosterTag Tag to foster
1169	* @return void
1170	*/
1171	protected function addFosterTag(Tag $tag, Tag $fosterTag)
1172	{
1173		list($childPos, $childPrio) = $this->getMagicStartCoords($tag->getPos() + $tag->getLen());
1174
1175		// Add a 0-width copy of the parent tag after this tag and make it depend on this tag
1176		$childTag = $this->addCopyTag($fosterTag, $childPos, 0, $childPrio);
1177		$tag->cascadeInvalidationTo($childTag);
1178	}
1179
1180	/**
1181	* Create and add an end tag for given start tag at given position
1182	*
1183	* @param  Tag     $startTag Start tag
1184	* @param  integer $tagPos   End tag's position (will be adjusted for whitespace if applicable)
1185	* @param  integer $prio     End tag's priority
1186	* @return Tag
1187	*/
1188	protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0)
1189	{
1190		$tagName = $startTag->getName();
1191
1192		// Adjust the end tag's position if whitespace is to be minimized
1193		if (($this->currentTag->getFlags() | $startTag->getFlags()) & self::RULE_IGNORE_WHITESPACE)
1194		{
1195			$tagPos = $this->getMagicEndPos($tagPos);
1196		}
1197
1198		// Add a 0-width end tag that is paired with the given start tag
1199		$endTag = $this->addEndTag($tagName, $tagPos, 0, $prio);
1200		$endTag->pairWith($startTag);
1201
1202		return $endTag;
1203	}
1204
1205	/**
1206	* Compute the position of a magic end tag, adjusted for whitespace
1207	*
1208	* @param  integer $tagPos Rightmost possible position for the tag
1209	* @return integer
1210	*/
1211	protected function getMagicEndPos($tagPos)
1212	{
1213		// Back up from given position to the cursor's position until we find a character that
1214		// is not whitespace
1215		while ($tagPos > $this->pos && strpos(self::WHITESPACE, $this->text[$tagPos - 1]) !== false)
1216		{
1217			--$tagPos;
1218		}
1219
1220		return $tagPos;
1221	}
1222
1223	/**
1224	* Compute the position and priority of a magic start tag, adjusted for whitespace
1225	*
1226	* @param  integer   $tagPos Leftmost possible position for the tag
1227	* @return integer[]         [Tag pos, priority]
1228	*/
1229	protected function getMagicStartCoords($tagPos)
1230	{
1231		if (empty($this->tagStack))
1232		{
1233			// Set the next position outside the text boundaries
1234			$nextPos  = $this->textLen + 1;
1235			$nextPrio = 0;
1236		}
1237		else
1238		{
1239			$nextTag  = end($this->tagStack);
1240			$nextPos  = $nextTag->getPos();
1241			$nextPrio = $nextTag->getSortPriority();
1242		}
1243
1244		// Find the first non-whitespace position before next tag or the end of text
1245		while ($tagPos < $nextPos && strpos(self::WHITESPACE, $this->text[$tagPos]) !== false)
1246		{
1247			++$tagPos;
1248		}
1249
1250		// Set a priority that ensures this tag appears before the next tag
1251		$prio = ($tagPos === $nextPos) ? $nextPrio - 1 : 0;
1252
1253		return [$tagPos, $prio];
1254	}
1255
1256	/**
1257	* Test whether given start tag is immediately followed by a closing tag
1258	*
1259	* @param  Tag  $tag Start tag
1260	* @return bool
1261	*/
1262	protected function isFollowedByClosingTag(Tag $tag)
1263	{
1264		return (empty($this->tagStack)) ? false : end($this->tagStack)->canClose($tag);
1265	}
1266
1267	/**
1268	* Process all tags in the stack
1269	*
1270	* @return void
1271	*/
1272	protected function processTags()
1273	{
1274		if (empty($this->tagStack))
1275		{
1276			return;
1277		}
1278
1279		// Initialize the count tables
1280		foreach (array_keys($this->tagsConfig) as $tagName)
1281		{
1282			$this->cntOpen[$tagName]  = 0;
1283			$this->cntTotal[$tagName] = 0;
1284		}
1285
1286		// Process the tag stack, close tags that were left open and repeat until done
1287		do
1288		{
1289			while (!empty($this->tagStack))
1290			{
1291				if (!$this->tagStackIsSorted)
1292				{
1293					$this->sortTags();
1294				}
1295
1296				$this->currentTag = array_pop($this->tagStack);
1297				$this->processCurrentTag();
1298			}
1299
1300			// Close tags that were left open
1301			foreach ($this->openTags as $startTag)
1302			{
1303				// NOTE: we add tags in hierarchical order (ancestors to descendants) but since
1304				//       the stack is processed in LIFO order, it means that tags get closed in
1305				//       the correct order, from descendants to ancestors
1306				$this->addMagicEndTag($startTag, $this->textLen);
1307			}
1308		}
1309		while (!empty($this->tagStack));
1310	}
1311
1312	/**
1313	* Process current tag
1314	*
1315	* @return void
1316	*/
1317	protected function processCurrentTag()
1318	{
1319		// Invalidate current tag if tags are disabled and current tag would not close the last open
1320		// tag and is not a system tag
1321		if (($this->context['flags'] & self::RULE_IGNORE_TAGS)
1322		 && !$this->currentTag->canClose(end($this->openTags))
1323		 && !$this->currentTag->isSystemTag())
1324		{
1325			$this->currentTag->invalidate();
1326		}
1327
1328		$tagPos = $this->currentTag->getPos();
1329		$tagLen = $this->currentTag->getLen();
1330
1331		// Test whether the cursor passed this tag's position already
1332		if ($this->pos > $tagPos && !$this->currentTag->isInvalid())
1333		{
1334			// Test whether this tag is paired with a start tag and this tag is still open
1335			$startTag = $this->currentTag->getStartTag();
1336
1337			if ($startTag && in_array($startTag, $this->openTags, true))
1338			{
1339				// Create an end tag that matches current tag's start tag, which consumes as much of
1340				// the same text as current tag and is paired with the same start tag
1341				$this->addEndTag(
1342					$startTag->getName(),
1343					$this->pos,
1344					max(0, $tagPos + $tagLen - $this->pos)
1345				)->pairWith($startTag);
1346
1347				// Note that current tag is not invalidated, it's merely replaced
1348				return;
1349			}
1350
1351			// If this is an ignore tag, try to ignore as much as the remaining text as possible
1352			if ($this->currentTag->isIgnoreTag())
1353			{
1354				$ignoreLen = $tagPos + $tagLen - $this->pos;
1355
1356				if ($ignoreLen > 0)
1357				{
1358					// Create a new ignore tag and move on
1359					$this->addIgnoreTag($this->pos, $ignoreLen);
1360
1361					return;
1362				}
1363			}
1364
1365			// Skipped tags are invalidated
1366			$this->currentTag->invalidate();
1367		}
1368
1369		if ($this->currentTag->isInvalid())
1370		{
1371			return;
1372		}
1373
1374		if ($this->currentTag->isIgnoreTag())
1375		{
1376			$this->outputIgnoreTag($this->currentTag);
1377		}
1378		elseif ($this->currentTag->isBrTag())
1379		{
1380			// Output the tag if it's allowed, ignore it otherwise
1381			if (!($this->context['flags'] & self::RULE_PREVENT_BR))
1382			{
1383				$this->outputBrTag($this->currentTag);
1384			}
1385		}
1386		elseif ($this->currentTag->isParagraphBreak())
1387		{
1388			$this->outputText($this->currentTag->getPos(), 0, true);
1389		}
1390		elseif ($this->currentTag->isVerbatim())
1391		{
1392			$this->outputVerbatim($this->currentTag);
1393		}
1394		elseif ($this->currentTag->isStartTag())
1395		{
1396			$this->processStartTag($this->currentTag);
1397		}
1398		else
1399		{
1400			$this->processEndTag($this->currentTag);
1401		}
1402	}
1403
1404	/**
1405	* Process given start tag (including self-closing tags) at current position
1406	*
1407	* @param  Tag  $tag Start tag (including self-closing)
1408	* @return void
1409	*/
1410	protected function processStartTag(Tag $tag)
1411	{
1412		$tagName   = $tag->getName();
1413		$tagConfig = $this->tagsConfig[$tagName];
1414
1415		// 1. Check that this tag has not reached its global limit tagLimit
1416		// 2. Execute this tag's filterChain, which will filter/validate its attributes
1417		// 3. Apply closeParent, closeAncestor and fosterParent rules
1418		// 4. Check for nestingLimit
1419		// 5. Apply requireAncestor rules
1420		//
1421		// This order ensures that the tag is valid and within the set limits before we attempt to
1422		// close parents or ancestors. We need to close ancestors before we can check for nesting
1423		// limits, whether this tag is allowed within current context (the context may change
1424		// as ancestors are closed) or whether the required ancestors are still there (they might
1425		// have been closed by a rule.)
1426		if ($this->cntTotal[$tagName] >= $tagConfig['tagLimit'])
1427		{
1428			$this->logger->err(
1429				'Tag limit exceeded',
1430				[
1431					'tag'      => $tag,
1432					'tagName'  => $tagName,
1433					'tagLimit' => $tagConfig['tagLimit']
1434				]
1435			);
1436			$tag->invalidate();
1437
1438			return;
1439		}
1440
1441		FilterProcessing::filterTag($tag, $this, $this->tagsConfig, $this->openTags);
1442		if ($tag->isInvalid())
1443		{
1444			return;
1445		}
1446
1447		if ($this->currentFixingCost < $this->maxFixingCost)
1448		{
1449			if ($this->fosterParent($tag) || $this->closeParent($tag) || $this->closeAncestor($tag))
1450			{
1451				// This tag parent/ancestor needs to be closed, we just return (the tag is still valid)
1452				return;
1453			}
1454		}
1455
1456		if ($this->cntOpen[$tagName] >= $tagConfig['nestingLimit'])
1457		{
1458			$this->logger->err(
1459				'Nesting limit exceeded',
1460				[
1461					'tag'          => $tag,
1462					'tagName'      => $tagName,
1463					'nestingLimit' => $tagConfig['nestingLimit']
1464				]
1465			);
1466			$tag->invalidate();
1467
1468			return;
1469		}
1470
1471		if (!$this->tagIsAllowed($tagName))
1472		{
1473			$msg     = 'Tag is not allowed in this context';
1474			$context = ['tag' => $tag, 'tagName' => $tagName];
1475			if ($tag->getLen() > 0)
1476			{
1477				$this->logger->warn($msg, $context);
1478			}
1479			else
1480			{
1481				$this->logger->debug($msg, $context);
1482			}
1483			$tag->invalidate();
1484
1485			return;
1486		}
1487
1488		if ($this->requireAncestor($tag))
1489		{
1490			$tag->invalidate();
1491
1492			return;
1493		}
1494
1495		// If this tag has an autoClose rule and it's not self-closed, paired with an end tag, or
1496		// immediately followed by an end tag, we replace it with a self-closing tag with the same
1497		// properties
1498		if ($tag->getFlags() & self::RULE_AUTO_CLOSE
1499		 && !$tag->isSelfClosingTag()
1500		 && !$tag->getEndTag()
1501		 && !$this->isFollowedByClosingTag($tag))
1502		{
1503			$newTag = new Tag(Tag::SELF_CLOSING_TAG, $tagName, $tag->getPos(), $tag->getLen());
1504			$newTag->setAttributes($tag->getAttributes());
1505			$newTag->setFlags($tag->getFlags());
1506
1507			$tag = $newTag;
1508		}
1509
1510		if ($tag->getFlags() & self::RULE_TRIM_FIRST_LINE
1511		 && substr($this->text, $tag->getPos() + $tag->getLen(), 1) === "\n")
1512		{
1513			$this->addIgnoreTag($tag->getPos() + $tag->getLen(), 1);
1514		}
1515
1516		// This tag is valid, output it and update the context
1517		$this->outputTag($tag);
1518		$this->pushContext($tag);
1519
1520		// Apply the createChild rules if applicable
1521		$this->createChild($tag);
1522	}
1523
1524	/**
1525	* Process given end tag at current position
1526	*
1527	* @param  Tag  $tag end tag
1528	* @return void
1529	*/
1530	protected function processEndTag(Tag $tag)
1531	{
1532		$tagName = $tag->getName();
1533
1534		if (empty($this->cntOpen[$tagName]))
1535		{
1536			// This is an end tag with no start tag
1537			return;
1538		}
1539
1540		/**
1541		* @var array List of tags need to be closed before given tag
1542		*/
1543		$closeTags = [];
1544
1545		// Iterate through all open tags from last to first to find a match for our tag
1546		$i = count($this->openTags);
1547		while (--$i >= 0)
1548		{
1549			$openTag = $this->openTags[$i];
1550
1551			if ($tag->canClose($openTag))
1552			{
1553				break;
1554			}
1555
1556			$closeTags[] = $openTag;
1557			++$this->currentFixingCost;
1558		}
1559
1560		if ($i < 0)
1561		{
1562			// Did not find a matching tag
1563			$this->logger->debug('Skipping end tag with no start tag', ['tag' => $tag]);
1564
1565			return;
1566		}
1567
1568		// Accumulate flags to determine whether whitespace should be trimmed
1569		$flags = $tag->getFlags();
1570		foreach ($closeTags as $openTag)
1571		{
1572			$flags |= $openTag->getFlags();
1573		}
1574		$ignoreWhitespace = (bool) ($flags & self::RULE_IGNORE_WHITESPACE);
1575
1576		// Only reopen tags if we haven't exceeded our "fixing" budget
1577		$keepReopening = (bool) ($this->currentFixingCost < $this->maxFixingCost);
1578
1579		// Iterate over tags that are being closed, output their end tag and collect tags to be
1580		// reopened
1581		$reopenTags = [];
1582		foreach ($closeTags as $openTag)
1583		{
1584			$openTagName = $openTag->getName();
1585
1586			// Test whether this tag should be reopened automatically
1587			if ($keepReopening)
1588			{
1589				if ($openTag->getFlags() & self::RULE_AUTO_REOPEN)
1590				{
1591					$reopenTags[] = $openTag;
1592				}
1593				else
1594				{
1595					$keepReopening = false;
1596				}
1597			}
1598
1599			// Find the earliest position we can close this open tag
1600			$tagPos = $tag->getPos();
1601			if ($ignoreWhitespace)
1602			{
1603				$tagPos = $this->getMagicEndPos($tagPos);
1604			}
1605
1606			// Output an end tag to close this start tag, then update the context
1607			$endTag = new Tag(Tag::END_TAG, $openTagName, $tagPos, 0);
1608			$endTag->setFlags($openTag->getFlags());
1609			$this->outputTag($endTag);
1610			$this->popContext();
1611		}
1612
1613		// Output our tag, moving the cursor past it, then update the context
1614		$this->outputTag($tag);
1615		$this->popContext();
1616
1617		// If our fixing budget allows it, peek at upcoming tags and remove end tags that would
1618		// close tags that are already being closed now. Also, filter our list of tags being
1619		// reopened by removing those that would immediately be closed
1620		if (!empty($closeTags) && $this->currentFixingCost < $this->maxFixingCost)
1621		{
1622			/**
1623			* @var integer Rightmost position of the portion of text to ignore
1624			*/
1625			$ignorePos = $this->pos;
1626
1627			$i = count($this->tagStack);
1628			while (--$i >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1629			{
1630				$upcomingTag = $this->tagStack[$i];
1631
1632				// Test whether the upcoming tag is positioned at current "ignore" position and it's
1633				// strictly an end tag (not a start tag or a self-closing tag)
1634				if ($upcomingTag->getPos() > $ignorePos
1635				 || $upcomingTag->isStartTag())
1636				{
1637					break;
1638				}
1639
1640				// Test whether this tag would close any of the tags we're about to reopen
1641				$j = count($closeTags);
1642
1643				while (--$j >= 0 && ++$this->currentFixingCost < $this->maxFixingCost)
1644				{
1645					if ($upcomingTag->canClose($closeTags[$j]))
1646					{
1647						// Remove the tag from the lists and reset the keys
1648						array_splice($closeTags, $j, 1);
1649
1650						if (isset($reopenTags[$j]))
1651						{
1652							array_splice($reopenTags, $j, 1);
1653						}
1654
1655						// Extend the ignored text to cover this tag
1656						$ignorePos = max(
1657							$ignorePos,
1658							$upcomingTag->getPos() + $upcomingTag->getLen()
1659						);
1660
1661						break;
1662					}
1663				}
1664			}
1665
1666			if ($ignorePos > $this->pos)
1667			{
1668				/**
1669				* @todo have a method that takes (pos,len) rather than a Tag
1670				*/
1671				$this->outputIgnoreTag(new Tag(Tag::SELF_CLOSING_TAG, 'i', $this->pos, $ignorePos - $this->pos));
1672			}
1673		}
1674
1675		// Re-add tags that need to be reopened, at current cursor position
1676		foreach ($reopenTags as $startTag)
1677		{
1678			$newTag = $this->addCopyTag($startTag, $this->pos, 0);
1679
1680			// Re-pair the new tag
1681			$endTag = $startTag->getEndTag();
1682			if ($endTag)
1683			{
1684				$newTag->pairWith($endTag);
1685			}
1686		}
1687	}
1688
1689	/**
1690	* Update counters and replace current context with its parent context
1691	*
1692	* @return void
1693	*/
1694	protected function popContext()
1695	{
1696		$tag = array_pop($this->openTags);
1697		--$this->cntOpen[$tag->getName()];
1698		$this->context = $this->context['parentContext'];
1699	}
1700
1701	/**
1702	* Update counters and replace current context with a new context based on given tag
1703	*
1704	* If given tag is a self-closing tag, the context won't change
1705	*
1706	* @param  Tag  $tag Start tag (including self-closing)
1707	* @return void
1708	*/
1709	protected function pushContext(Tag $tag)
1710	{
1711		$tagName   = $tag->getName();
1712		$tagFlags  = $tag->getFlags();
1713		$tagConfig = $this->tagsConfig[$tagName];
1714
1715		++$this->cntTotal[$tagName];
1716
1717		// If this is a self-closing tag, the context remains the same
1718		if ($tag->isSelfClosingTag())
1719		{
1720			return;
1721		}
1722
1723		// Recompute the allowed tags
1724		$allowed = [];
1725		foreach ($this->context['allowed'] as $k => $v)
1726		{
1727			// If the current tag is not transparent, override the low bits (allowed children) of
1728			// current context with its high bits (allowed descendants)
1729			if (!($tagFlags & self::RULE_IS_TRANSPARENT))
1730			{
1731				$v = ($v & 0xFF00) | ($v >> 8);
1732			}
1733			$allowed[] = $tagConfig['allowed'][$k] & $v;
1734		}
1735
1736		// Use this tag's flags as a base for this context and add inherited rules
1737		$flags = $tagFlags | ($this->context['flags'] & self::RULES_INHERITANCE);
1738
1739		// RULE_DISABLE_AUTO_BR turns off RULE_ENABLE_AUTO_BR
1740		if ($flags & self::RULE_DISABLE_AUTO_BR)
1741		{
1742			$flags &= ~self::RULE_ENABLE_AUTO_BR;
1743		}
1744
1745		++$this->cntOpen[$tagName];
1746		$this->openTags[] = $tag;
1747		$this->context = [
1748			'allowed'       => $allowed,
1749			'flags'         => $flags,
1750			'inParagraph'   => false,
1751			'parentContext' => $this->context
1752		];
1753	}
1754
1755	/**
1756	* Return whether given tag is allowed in current context
1757	*
1758	* @param  string $tagName
1759	* @return bool
1760	*/
1761	protected function tagIsAllowed($tagName)
1762	{
1763		$n = $this->tagsConfig[$tagName]['bitNumber'];
1764
1765		return (bool) ($this->context['allowed'][$n >> 3] & (1 << ($n & 7)));
1766	}
1767
1768	//==========================================================================
1769	// Tag stack
1770	//==========================================================================
1771
1772	/**
1773	* Add a start tag
1774	*
1775	* @param  string  $name Name of the tag
1776	* @param  integer $pos  Position of the tag in the text
1777	* @param  integer $len  Length of text consumed by the tag
1778	* @param  integer $prio Tag's priority
1779	* @return Tag
1780	*/
1781	public function addStartTag($name, $pos, $len, $prio = 0)
1782	{
1783		return $this->addTag(Tag::START_TAG, $name, $pos, $len, $prio);
1784	}
1785
1786	/**
1787	* Add an end tag
1788	*
1789	* @param  string  $name Name of the tag
1790	* @param  integer $pos  Position of the tag in the text
1791	* @param  integer $len  Length of text consumed by the tag
1792	* @param  integer $prio Tag's priority
1793	* @return Tag
1794	*/
1795	public function addEndTag($name, $pos, $len, $prio = 0)
1796	{
1797		return $this->addTag(Tag::END_TAG, $name, $pos, $len, $prio);
1798	}
1799
1800	/**
1801	* Add a self-closing tag
1802	*
1803	* @param  string  $name Name of the tag
1804	* @param  integer $pos  Position of the tag in the text
1805	* @param  integer $len  Length of text consumed by the tag
1806	* @param  integer $prio Tag's priority
1807	* @return Tag
1808	*/
1809	public function addSelfClosingTag($name, $pos, $len, $prio = 0)
1810	{
1811		return $this->addTag(Tag::SELF_CLOSING_TAG, $name, $pos, $len, $prio);
1812	}
1813
1814	/**
1815	* Add a 0-width "br" tag to force a line break at given position
1816	*
1817	* @param  integer $pos  Position of the tag in the text
1818	* @param  integer $prio Tag's priority
1819	* @return Tag
1820	*/
1821	public function addBrTag($pos, $prio = 0)
1822	{
1823		return $this->addTag(Tag::SELF_CLOSING_TAG, 'br', $pos, 0, $prio);
1824	}
1825
1826	/**
1827	* Add an "ignore" tag
1828	*
1829	* @param  integer $pos  Position of the tag in the text
1830	* @param  integer $len  Length of text consumed by the tag
1831	* @param  integer $prio Tag's priority
1832	* @return Tag
1833	*/
1834	public function addIgnoreTag($pos, $len, $prio = 0)
1835	{
1836		return $this->addTag(Tag::SELF_CLOSING_TAG, 'i', $pos, min($len, $this->textLen - $pos), $prio);
1837	}
1838
1839	/**
1840	* Add a paragraph break at given position
1841	*
1842	* Uses a zero-width tag that is actually never output in the result
1843	*
1844	* @param  integer $pos  Position of the tag in the text
1845	* @param  integer $prio Tag's priority
1846	* @return Tag
1847	*/
1848	public function addParagraphBreak($pos, $prio = 0)
1849	{
1850		return $this->addTag(Tag::SELF_CLOSING_TAG, 'pb', $pos, 0, $prio);
1851	}
1852
1853	/**
1854	* Add a copy of given tag at given position and length
1855	*
1856	* @param  Tag     $tag  Original tag
1857	* @param  integer $pos  Copy's position
1858	* @param  integer $len  Copy's length
1859	* @param  integer $prio Copy's priority (same as original by default)
1860	* @return Tag           Copy tag
1861	*/
1862	public function addCopyTag(Tag $tag, $pos, $len, $prio = null)
1863	{
1864		if (!isset($prio))
1865		{
1866			$prio = $tag->getSortPriority();
1867		}
1868		$copy = $this->addTag($tag->getType(), $tag->getName(), $pos, $len, $prio);
1869		$copy->setAttributes($tag->getAttributes());
1870
1871		return $copy;
1872	}
1873
1874	/**
1875	* Add a tag
1876	*
1877	* @param  integer $type Tag's type
1878	* @param  string  $name Name of the tag
1879	* @param  integer $pos  Position of the tag in the text
1880	* @param  integer $len  Length of text consumed by the tag
1881	* @param  integer $prio Tag's priority
1882	* @return Tag
1883	*/
1884	protected function addTag($type, $name, $pos, $len, $prio)
1885	{
1886		// Create the tag
1887		$tag = new Tag($type, $name, $pos, $len, $prio);
1888
1889		// Set this tag's rules bitfield
1890		if (isset($this->tagsConfig[$name]))
1891		{
1892			$tag->setFlags($this->tagsConfig[$name]['rules']['flags']);
1893		}
1894
1895		// Invalidate this tag if it's an unknown tag, a disabled tag, if either of its length or
1896		// position is negative or if it's out of bounds
1897		if ((!isset($this->tagsConfig[$name]) && !$tag->isSystemTag())
1898		 || $this->isInvalidTextSpan($pos, $len))
1899		{
1900			$tag->invalidate();
1901		}
1902		elseif (!empty($this->tagsConfig[$name]['isDisabled']))
1903		{
1904			$this->logger->warn(
1905				'Tag is disabled',
1906				[
1907					'tag'     => $tag,
1908					'tagName' => $name
1909				]
1910			);
1911			$tag->invalidate();
1912		}
1913		else
1914		{
1915			$this->insertTag($tag);
1916		}
1917
1918		return $tag;
1919	}
1920
1921	/**
1922	* Test whether given text span is outside text boundaries or an invalid UTF sequence
1923	*
1924	* @param  integer $pos Start of text
1925	* @param  integer $len Length of text
1926	* @return bool
1927	*/
1928	protected function isInvalidTextSpan($pos, $len)
1929	{
1930		return ($len < 0 || $pos < 0 || $pos + $len > $this->textLen || preg_match('([\\x80-\\xBF])', substr($this->text, $pos, 1) . substr($this->text, $pos + $len, 1)));
1931	}
1932
1933	/**
1934	* Insert given tag in the tag stack
1935	*
1936	* @param  Tag  $tag
1937	* @return void
1938	*/
1939	protected function insertTag(Tag $tag)
1940	{
1941		if (!$this->tagStackIsSorted)
1942		{
1943			$this->tagStack[] = $tag;
1944		}
1945		else
1946		{
1947			// Scan the stack and copy every tag to the next slot until we find the correct index
1948			$i   = count($this->tagStack);
1949			$key = $this->getSortKey($tag);
1950			while ($i > 0 && $key > $this->getSortKey($this->tagStack[$i - 1]))
1951			{
1952				$this->tagStack[$i] = $this->tagStack[$i - 1];
1953				--$i;
1954			}
1955			$this->tagStack[$i] = $tag;
1956		}
1957	}
1958
1959	/**
1960	* Add a pair of tags
1961	*
1962	* @param  string  $name     Name of the tags
1963	* @param  integer $startPos Position of the start tag
1964	* @param  integer $startLen Length of the start tag
1965	* @param  integer $endPos   Position of the start tag
1966	* @param  integer $endLen   Length of the start tag
1967	* @param  integer $prio     Start tag's priority (the end tag will be set to minus that value)
1968	* @return Tag               Start tag
1969	*/
1970	public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0)
1971	{
1972		// NOTE: the end tag is added first to try to keep the stack in the correct order
1973		$endTag   = $this->addEndTag($name, $endPos, $endLen, -$prio);
1974		$startTag = $this->addStartTag($name, $startPos, $startLen, $prio);
1975		$startTag->pairWith($endTag);
1976
1977		return $startTag;
1978	}
1979
1980	/**
1981	* Add a tag that represents a verbatim copy of the original text
1982	*
1983	* @param  integer $pos  Position of the tag in the text
1984	* @param  integer $len  Length of text consumed by the tag
1985	* @param  integer $prio Tag's priority
1986	* @return Tag
1987	*/
1988	public function addVerbatim($pos, $len, $prio = 0)
1989	{
1990		return $this->addTag(Tag::SELF_CLOSING_TAG, 'v', $pos, $len, $prio);
1991	}
1992
1993	/**
1994	* Sort tags by position and precedence
1995	*
1996	* @return void
1997	*/
1998	protected function sortTags()
1999	{
2000		$arr = [];
2001		foreach ($this->tagStack as $i => $tag)
2002		{
2003			$key       = $this->getSortKey($tag, $i);
2004			$arr[$key] = $tag;
2005		}
2006		krsort($arr);
2007
2008		$this->tagStack         = array_values($arr);
2009		$this->tagStackIsSorted = true;
2010	}
2011
2012	/**
2013	* Generate a key for given tag that can be used to compare its position using lexical comparisons
2014	*
2015	* Tags are sorted by position first, then by priority, then by whether they consume any text,
2016	* then by length, and finally in order of their creation.
2017	*
2018	* The stack's array is in reverse order. Therefore, tags that appear at the start of the text
2019	* are at the end of the array.
2020	*
2021	* @param  Tag     $tag
2022	* @param  integer $tagIndex
2023	* @return string
2024	*/
2025	protected function getSortKey(Tag $tag, int $tagIndex = 0): string
2026	{
2027		// Ensure that negative values are sorted correctly by flagging them and making them positive
2028		$prioFlag = ($tag->getSortPriority() >= 0);
2029		$prio     = $tag->getSortPriority();
2030		if (!$prioFlag)
2031		{
2032			$prio += (1 << 30);
2033		}
2034
2035		// Sort 0-width tags separately from the rest
2036		$lenFlag = ($tag->getLen() > 0);
2037		if ($lenFlag)
2038		{
2039			// Inverse their length so that longest matches are processed first
2040			$lenOrder = $this->textLen - $tag->getLen();
2041		}
2042		else
2043		{
2044			// Sort self-closing tags in-between start tags and end tags to keep them outside of tag
2045			// pairs
2046			$order = [
2047				Tag::END_TAG          => 0,
2048				Tag::SELF_CLOSING_TAG => 1,
2049				Tag::START_TAG        => 2
2050			];
2051			$lenOrder = $order[$tag->getType()];
2052		}
2053
2054		return sprintf('%8x%d%8x%d%8x%8x', $tag->getPos(), $prioFlag, $prio, $lenFlag, $lenOrder, $tagIndex);
2055	}
2056}