1<?php
2declare( strict_types = 1 );
3
4namespace Wikimedia\Parsoid\Config;
5
6use DOMDocument;
7use DOMElement;
8use DOMNode;
9use Wikimedia\Parsoid\Core\ContentModelHandler;
10use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
11use Wikimedia\Parsoid\Logger\ParsoidLogger;
12use Wikimedia\Parsoid\Parsoid;
13use Wikimedia\Parsoid\Tokens\Token;
14use Wikimedia\Parsoid\Utils\DataBag;
15use Wikimedia\Parsoid\Utils\DOMCompat;
16use Wikimedia\Parsoid\Utils\DOMUtils;
17use Wikimedia\Parsoid\Utils\Title;
18use Wikimedia\Parsoid\Utils\TitleException;
19use Wikimedia\Parsoid\Utils\TitleNamespace;
20use Wikimedia\Parsoid\Utils\TokenUtils;
21use Wikimedia\Parsoid\Utils\Utils;
22use Wikimedia\Parsoid\Wt2Html\Frame;
23use Wikimedia\Parsoid\Wt2Html\PageConfigFrame;
24use Wikimedia\Parsoid\Wt2Html\ParserPipelineFactory;
25use Wikimedia\Parsoid\Wt2Html\TT\Sanitizer;
26
27// phpcs:disable MediaWiki.Commenting.FunctionComment.MissingDocumentationPublic
28
29/**
30 * Environment/Envelope class for Parsoid
31 *
32 * Carries around the SiteConfig and PageConfig during an operation
33 * and provides certain other services.
34 */
35class Env {
36
37	/** @var SiteConfig */
38	private $siteConfig;
39
40	/** @var PageConfig */
41	private $pageConfig;
42
43	/** @var DataAccess */
44	private $dataAccess;
45
46	/**
47	 * The top-level frame for this conversion.  This largely wraps the
48	 * PageConfig.
49	 *
50	 * In the future we may replace PageConfig with the Frame, and add
51	 * a
52	 * @var Frame
53	 */
54	public $topFrame;
55	// XXX In the future, perhaps replace PageConfig with the Frame, and
56	// add $this->currentFrame (relocated from TokenTransformManager) if/when
57	// we've removed async parsing.
58
59	/**
60	 * @var bool Are data accesses disabled?
61	 *
62	 * FIXME: This can probably moved to a NoDataAccess instance, rather than
63	 * being an explicit mode of Parsoid.  See T229469
64	 */
65	private $noDataAccess;
66
67	/**
68	 * @var bool Are we using native template expansion?
69	 *
70	 * Parsoid implements native template expansion, which is currently
71	 * only used during parser tests; in production, template expansion
72	 * is done via MediaWiki's legacy preprocessor.
73	 *
74	 * FIXME: Hopefully this distinction can be removed when we're entirely
75	 * in PHP land.
76	 */
77	private $nativeTemplateExpansion;
78
79	/** @phan-var array<string,int> */
80	private $wt2htmlUsage = [];
81
82	/** @phan-var array<string,int> */
83	private $html2wtUsage = [];
84
85	/** @var DOMDocument[] */
86	private $liveDocs = [];
87
88	/** @var bool */
89	private $wrapSections = true;
90
91	/** @var string */
92	private $requestOffsetType = 'byte';
93
94	/** @var string */
95	private $currentOffsetType = 'byte';
96
97	/** @var array<string,mixed> */
98	private $behaviorSwitches = [];
99
100	/**
101	 * Maps fragment id to the fragment forest (array of DOMNodes).
102	 * @var array<string,DOMNode[]>
103	 */
104	private $fragmentMap = [];
105
106	/**
107	 * @var int used to generate fragment ids as needed during parse
108	 */
109	private $fid = 1;
110
111	/** @var int used to generate uids as needed during this parse */
112	private $uid = 1;
113
114	/** @var array[] Lints recorded */
115	private $lints = [];
116
117	/** @var bool logLinterData */
118	public $logLinterData = false;
119
120	/** @var bool[] */
121	private $traceFlags;
122
123	/** @var bool[] */
124	private $dumpFlags;
125
126	/** @var bool[] */
127	private $debugFlags;
128
129	/** @var ParsoidLogger */
130	private $parsoidLogger;
131
132	/** @var float */
133	public $startTime;
134
135	/** @var bool */
136	private $scrubWikitext = false;
137
138	/**
139	 * The default content version that Parsoid assumes it's serializing or
140	 * updating in the pb2pb endpoints
141	 *
142	 * @var string
143	 */
144	private $inputContentVersion;
145
146	/**
147	 * The default content version that Parsoid will generate.
148	 *
149	 * @var string
150	 */
151	private $outputContentVersion;
152
153	/**
154	 * If non-null, the language variant used for Parsoid HTML;
155	 * we convert to this if wt2html, or from this if html2wt.
156	 * @var string
157	 */
158	private $htmlVariantLanguage;
159
160	/**
161	 * If non-null, the language variant to be used for wikitext.
162	 * If null, heuristics will be used to identify the original wikitext variant
163	 * in wt2html mode, and in html2wt mode new or edited HTML will be left unconverted.
164	 * @var string
165	 */
166	private $wtVariantLanguage;
167
168	/** @var ParserPipelineFactory */
169	private $pipelineFactory;
170
171	/**
172	 * FIXME Used in DedupeStyles::dedupe()
173	 * @var array
174	 */
175	public $styleTagKeys = [];
176
177	/** @var bool */
178	public $pageBundle = false;
179
180	/** @var bool */
181	public $discardDataParsoid = false;
182
183	/** @var DOMNode */
184	private $origDOM;
185
186	/** @var DOMDocument */
187	private $domDiff;
188
189	/**
190	 * Page properties (module resources primarily) that need to be output
191	 * @var array
192	 */
193	private $outputProps = [];
194
195	/**
196	 * PORT-FIXME: public currently
197	 * Cache of wikitext source for a title
198	 * @var array
199	 */
200	public $pageCache = [];
201
202	/**
203	 * PORT-FIXME: public currently
204	 * HTML Cache of expanded transclusions to support
205	 * reusing expansions from HTML of previous revision.
206	 * @var array
207	 */
208	public $transclusionCache = [];
209
210	/**
211	 * PORT-FIXME: public currently
212	 * HTML Cache of expanded media wikiext to support
213	 * reusing expansions from HTML of previous revision.
214	 * @var array
215	 */
216	public $mediaCache = [];
217
218	/**
219	 * PORT-FIXME: public currently
220	 * HTML Cache of expanded extension tags to support
221	 * reusing expansions from HTML of previous revision.
222	 * @var array
223	 */
224	public $extensionCache = [];
225
226	/**
227	 * @param SiteConfig $siteConfig
228	 * @param PageConfig $pageConfig
229	 * @param DataAccess $dataAccess
230	 * @param array|null $options
231	 *  - wrapSections: (bool) Whether `<section>` wrappers should be added.
232	 *  - pageBundle: (bool) Sets ids on nodes and stores data-* attributes in a JSON blob.
233	 *  - scrubWikitext: (bool) Indicates emit "clean" wikitext.
234	 *  - traceFlags: (array) Flags indicating which components need to be traced
235	 *  - dumpFlags: (bool[]) Dump flags
236	 *  - debugFlags: (bool[]) Debug flags
237	 *  - noDataAccess: boolean
238	 *  - nativeTemplateExpansion: boolean
239	 *  - discardDataParsoid: boolean
240	 *  - offsetType: 'byte' (default), 'ucs2', 'char'
241	 *                See `Parsoid\Wt2Html\PP\Processors\ConvertOffsets`.
242	 *  - logLinterData: (bool) Should we log linter data if linting is enabled?
243	 *  - htmlVariantLanguage: string|null
244	 *      If non-null, the language variant used for Parsoid HTML;
245	 *      we convert to this if wt2html, or from this if html2wt.
246	 *  - wtVariantLanguage: string|null
247	 *      If non-null, the language variant to be used for wikitext.
248	 *      If null, heuristics will be used to identify the original
249	 *      wikitext variant in wt2html mode, and in html2wt mode new
250	 *      or edited HTML will be left unconverted.
251	 *  - logLevels: (string[]) Levels to log
252	 */
253	public function __construct(
254		SiteConfig $siteConfig, PageConfig $pageConfig, DataAccess $dataAccess, array $options = null
255	) {
256		$options = $options ?? [];
257		$this->siteConfig = $siteConfig;
258		$this->pageConfig = $pageConfig;
259		$this->dataAccess = $dataAccess;
260		$this->topFrame = new PageConfigFrame( $this, $pageConfig, $siteConfig );
261		if ( isset( $options['scrubWikitext'] ) ) {
262			$this->scrubWikitext = !empty( $options['scrubWikitext'] );
263		}
264		if ( isset( $options['wrapSections'] ) ) {
265			$this->wrapSections = !empty( $options['wrapSections'] );
266		}
267		if ( isset( $options['pageBundle'] ) ) {
268			$this->pageBundle = !empty( $options['pageBundle'] );
269		}
270		$this->pipelineFactory = new ParserPipelineFactory( $this );
271		$defaultContentVersion = Parsoid::defaultHTMLVersion();
272		$this->inputContentVersion = $options['inputContentVersion'] ?? $defaultContentVersion;
273		// FIXME: We should have a check for the supported input content versions as well.
274		// That will require a separate constant.
275		$this->outputContentVersion = $options['outputContentVersion'] ?? $defaultContentVersion;
276		if ( !in_array( $this->outputContentVersion, Parsoid::AVAILABLE_VERSIONS, true ) ) {
277			throw new \UnexpectedValueException(
278				$this->outputContentVersion . ' is not an available content version.' );
279		}
280		$this->htmlVariantLanguage = $options['htmlVariantLanguage'] ?? null;
281		$this->wtVariantLanguage = $options['wtVariantLanguage'] ?? null;
282		$this->noDataAccess = !empty( $options['noDataAccess'] );
283		$this->nativeTemplateExpansion = !empty( $options['nativeTemplateExpansion'] );
284		$this->discardDataParsoid = !empty( $options['discardDataParsoid'] );
285		$this->requestOffsetType = $options['offsetType'] ?? 'byte';
286		$this->logLinterData = !empty( $options['logLinterData'] );
287		$this->traceFlags = $options['traceFlags'] ?? [];
288		$this->dumpFlags = $options['dumpFlags'] ?? [];
289		$this->debugFlags = $options['debugFlags'] ?? [];
290		$this->parsoidLogger = new ParsoidLogger( $this->siteConfig->getLogger(), [
291			'logLevels' => $options['logLevels'] ?? [ 'fatal', 'error', 'warn', 'info' ],
292			'debugFlags' => $this->debugFlags,
293			'dumpFlags' => $this->dumpFlags,
294			'traceFlags' => $this->traceFlags
295		] );
296	}
297
298	/**
299	 * @return bool
300	 */
301	public function hasTraceFlags(): bool {
302		return !empty( $this->traceFlags );
303	}
304
305	/**
306	 * Test which trace information to log
307	 *
308	 * @param string $flag Flag name.
309	 * @return bool
310	 */
311	public function hasTraceFlag( string $flag ): bool {
312		return isset( $this->traceFlags[$flag] );
313	}
314
315	/**
316	 * @return bool
317	 */
318	public function hasDumpFlags(): bool {
319		return !empty( $this->dumpFlags );
320	}
321
322	/**
323	 * Test which state to dump
324	 *
325	 * @param string $flag Flag name.
326	 * @return bool
327	 */
328	public function hasDumpFlag( string $flag ): bool {
329		return isset( $this->dumpFlags[$flag] );
330	}
331
332	/**
333	 * Get the site config
334	 * @return SiteConfig
335	 */
336	public function getSiteConfig(): SiteConfig {
337		return $this->siteConfig;
338	}
339
340	/**
341	 * Get the page config
342	 * @return PageConfig
343	 */
344	public function getPageConfig(): PageConfig {
345		return $this->pageConfig;
346	}
347
348	/**
349	 * Get the data access object
350	 * @return DataAccess
351	 */
352	public function getDataAccess(): DataAccess {
353		return $this->dataAccess;
354	}
355
356	public function noDataAccess(): bool {
357		return $this->noDataAccess;
358	}
359
360	public function nativeTemplateExpansionEnabled(): bool {
361		return $this->nativeTemplateExpansion;
362	}
363
364	/**
365	 * Get the current uid counter value
366	 * @return int
367	 */
368	public function getUID(): int {
369		return $this->uid;
370	}
371
372	/**
373	 * Get the current fragment id counter value
374	 * @return int
375	 */
376	public function getFID(): int {
377		return $this->fid;
378	}
379
380	/**
381	 * Whether `<section>` wrappers should be added.
382	 * @todo Does this actually belong here? Should it be a behavior switch?
383	 * @return bool
384	 */
385	public function getWrapSections(): bool {
386		return $this->wrapSections;
387	}
388
389	public function getPipelineFactory(): ParserPipelineFactory {
390		return $this->pipelineFactory;
391	}
392
393	/**
394	 * Return the external format of character offsets in source ranges.
395	 * Internally we always keep DomSourceRange and SourceRange information
396	 * as UTF-8 byte offsets for efficiency (matches the native string
397	 * representation), but for external use we can convert these to
398	 * other formats when we output wt2html or input for html2wt.
399	 *
400	 * @see Parsoid\Wt2Html\PP\Processors\ConvertOffsets
401	 * @return string 'byte', 'ucs2', or 'char'
402	 */
403	public function getRequestOffsetType(): string {
404		return $this->requestOffsetType;
405	}
406
407	/**
408	 * Return the current format of character offsets in source ranges.
409	 * This allows us to track whether the internal byte offsets have
410	 * been converted to the external format (as returned by
411	 * `getRequestOffsetType`) yet.
412	 *
413	 * @see Parsoid\Wt2Html\PP\Processors\ConvertOffsets
414	 * @return string 'byte', 'ucs2', or 'char'
415	 */
416	public function getCurrentOffsetType(): string {
417		return $this->currentOffsetType;
418	}
419
420	/**
421	 * Update the current offset type. Only
422	 * Parsoid\Wt2Html\PP\Processors\ConvertOffsets should be doing this.
423	 * @param string $offsetType 'byte', 'ucs2', or 'char'
424	 */
425	public function setCurrentOffsetType( string $offsetType ) {
426		$this->currentOffsetType = $offsetType;
427	}
428
429	/**
430	 * Resolve strings that are page-fragments or subpage references with
431	 * respect to the current page name.
432	 *
433	 * TODO: Handle namespaces relative links like [[User:../../]] correctly, they
434	 * shouldn't be treated like links at all.
435	 *
436	 * @param string $str Page fragment or subpage reference. Not URL encoded.
437	 * @param bool $resolveOnly If true, only trim and add the current title to
438	 *  lone fragments. TODO: This parameter seems poorly named.
439	 * @return string Resolved title
440	 */
441	public function resolveTitle( string $str, bool $resolveOnly = false ): string {
442		$origName = $str;
443		$str = trim( $str ); // PORT-FIXME: Care about non-ASCII whitespace?
444
445		$pageConfig = $this->getPageConfig();
446
447		// Resolve lonely fragments (important if the current page is a subpage,
448		// otherwise the relative link will be wrong)
449		if ( $str !== '' && $str[0] === '#' ) {
450			$str = $pageConfig->getTitle() . $str;
451		}
452
453		// Default return value
454		$titleKey = $str;
455		if ( $this->getSiteConfig()->namespaceHasSubpages( $pageConfig->getNs() ) ) {
456			// Resolve subpages
457			$reNormalize = false;
458			if ( preg_match( '!^(?:\.\./)+!', $str, $relUp ) ) {
459				$levels = strlen( $relUp[0] ) / 3;  // Levels are indicated by '../'.
460				$titleBits = explode( '/', $pageConfig->getTitle() );
461				if ( count( $titleBits ) <= $levels ) {
462					// Too many levels -- invalid relative link
463					return $origName;
464				}
465				$newBits = array_slice( $titleBits, 0, -$levels );
466				if ( $str !== $relUp[0] ) {
467					$newBits[] = substr( $str, $levels * 3 );
468				}
469				$str = implode( '/', $newBits );
470				$reNormalize = true;
471			} elseif ( $str !== '' && $str[0] === '/' ) {
472				// Resolve absolute subpage links
473				$str = $pageConfig->getTitle() . $str;
474				$reNormalize = true;
475			}
476
477			if ( $reNormalize && !$resolveOnly ) {
478				// Remove final slashes if present.
479				// See https://gerrit.wikimedia.org/r/173431
480				$str = rtrim( $str, '/' );
481				$titleKey = (string)$this->normalizedTitleKey( $str );
482			}
483		}
484
485		// Strip leading ':'
486		if ( $titleKey !== '' && $titleKey[0] === ':' && !$resolveOnly ) {
487			$titleKey = substr( $titleKey, 1 );
488		}
489		return $titleKey;
490	}
491
492	/**
493	 * Convert a Title to a string
494	 * @param Title $title
495	 * @param bool $ignoreFragment
496	 * @return string
497	 */
498	private function titleToString( Title $title, bool $ignoreFragment = false ): string {
499		$ret = $title->getPrefixedDBKey();
500		if ( !$ignoreFragment ) {
501			$fragment = $title->getFragment() ?? '';
502			if ( $fragment !== '' ) {
503				$ret .= '#' . $fragment;
504			}
505		}
506		return $ret;
507	}
508
509	/**
510	 * Get normalized title key for a title string.
511	 *
512	 * @param string $str Should be in url-decoded format.
513	 * @param bool $noExceptions Return null instead of throwing exceptions.
514	 * @param bool $ignoreFragment Ignore the fragment, if any.
515	 * @return string|null Normalized title key for a title string (or null for invalid titles).
516	 */
517	public function normalizedTitleKey(
518		string $str, bool $noExceptions = false, bool $ignoreFragment = false
519	): ?string {
520		$title = $this->makeTitleFromURLDecodedStr( $str, 0, $noExceptions );
521		if ( !$title ) {
522			return null;
523		}
524		return $this->titleToString( $title, $ignoreFragment );
525	}
526
527	/**
528	 * Normalize and resolve the page title
529	 * @deprecated Just use $this->getPageConfig()->getTitle() directly
530	 * @return string
531	 */
532	public function normalizeAndResolvePageTitle(): string {
533		return $this->getPageConfig()->getTitle();
534	}
535
536	/**
537	 * Create a Title object
538	 * @param string $text URL-decoded text
539	 * @param int|TitleNamespace $defaultNs
540	 * @param bool $noExceptions
541	 * @return Title|null
542	 */
543	private function makeTitle( string $text, $defaultNs = 0, bool $noExceptions = false ): ?Title {
544		try {
545			if ( preg_match( '!^(?:[#/]|\.\./)!', $text ) ) {
546				$defaultNs = $this->getPageConfig()->getNs();
547			}
548			$text = $this->resolveTitle( $text );
549			return Title::newFromText( $text, $this->getSiteConfig(), $defaultNs );
550		} catch ( TitleException $e ) {
551			if ( $noExceptions ) {
552				return null;
553			}
554			throw $e;
555		}
556	}
557
558	/**
559	 * Create a Title object
560	 * @see Title::newFromURL in MediaWiki
561	 * @param string $str URL-encoded text
562	 * @param int|TitleNamespace $defaultNs
563	 * @param bool $noExceptions
564	 * @return Title|null
565	 */
566	public function makeTitleFromText(
567		string $str, $defaultNs = 0, bool $noExceptions = false
568	): ?Title {
569		return $this->makeTitle( Utils::decodeURIComponent( $str ), $defaultNs, $noExceptions );
570	}
571
572	/**
573	 * Create a Title object
574	 * @see Title::newFromText in MediaWiki
575	 * @param string $str URL-decoded text
576	 * @param int|TitleNamespace $defaultNs
577	 * @param bool $noExceptions
578	 * @return Title|null
579	 */
580	public function makeTitleFromURLDecodedStr(
581		string $str, $defaultNs = 0, bool $noExceptions = false
582	): ?Title {
583		return $this->makeTitle( $str, $defaultNs, $noExceptions );
584	}
585
586	/**
587	 * Make a link to a Title
588	 * @param Title $title
589	 * @return string
590	 */
591	public function makeLink( Title $title ): string {
592		return Sanitizer::sanitizeTitleURI(
593			$this->getSiteConfig()->relativeLinkPrefix() . $this->titleToString( $title ),
594			false
595		);
596	}
597
598	/**
599	 * Test if an href attribute value could be a valid link target
600	 * @param string|(Token|string)[] $href
601	 * @return bool
602	 */
603	public function isValidLinkTarget( $href ): bool {
604		$href = TokenUtils::tokensToString( $href );
605
606		// decode percent-encoding so that we can reliably detect
607		// bad page title characters
608		$hrefToken = Utils::decodeURIComponent( $href );
609		return $this->normalizedTitleKey( $this->resolveTitle( $hrefToken, true ), true ) !== null;
610	}
611
612	/**
613	 * Generate a new uid
614	 * @return int
615	 */
616	public function generateUID(): int {
617		return $this->uid++;
618	}
619
620	/**
621	 * Generate a new object id
622	 * @return string
623	 */
624	public function newObjectId(): string {
625		return "mwt" . $this->generateUID();
626	}
627
628	/**
629	 * Generate a new about id
630	 * @return string
631	 */
632	public function newAboutId(): string {
633		return "#" . $this->newObjectId();
634	}
635
636	/**
637	 * Store reference to original DOM (body)
638	 * @param DOMElement $domBody
639	 */
640	public function setOrigDOM( DOMElement $domBody ): void {
641		$this->origDOM = $domBody;
642	}
643
644	/**
645	 * Return reference to original DOM (body)
646	 * @return DOMElement
647	 */
648	public function getOrigDOM(): DOMElement {
649		return $this->origDOM;
650	}
651
652	/**
653	 * Store reference to DOM diff document
654	 * @param DOMDocument $doc
655	 */
656	public function setDOMDiff( $doc ): void {
657		$this->domDiff = $doc;
658	}
659
660	/**
661	 * Return reference to DOM diff document
662	 * @return DOMDocument|null
663	 */
664	public function getDOMDiff(): ?DOMDocument {
665		return $this->domDiff;
666	}
667
668	/**
669	 * Generate a new fragment id
670	 * @return string
671	 */
672	public function newFragmentId(): string {
673		return "mwf" . (string)$this->fid++;
674	}
675
676	/**
677	 * FIXME: This function could be given a better name to reflect what it does.
678	 *
679	 * @param DOMDocument $doc
680	 * @param DataBag|null $bag
681	 */
682	public function referenceDataObject( DOMDocument $doc, ?DataBag $bag = null ): void {
683		// `bag` is a deliberate dynamic property; see DOMDataUtils::getBag()
684		// @phan-suppress-next-line PhanUndeclaredProperty dynamic property
685		$doc->bag = $bag ?? new DataBag();
686
687		// Prevent GC from collecting the PHP wrapper around the libxml doc
688		$this->liveDocs[] = $doc;
689	}
690
691	/**
692	 * @param string $html
693	 * @param bool $validateXMLNames
694	 * @return DOMDocument
695	 */
696	public function createDocument(
697		string $html = '', bool $validateXMLNames = false
698	): DOMDocument {
699		$doc = DOMUtils::parseHTML( $html, $validateXMLNames );
700		// Cache the head and body.
701		DOMCompat::getHead( $doc );
702		DOMCompat::getBody( $doc );
703		$this->referenceDataObject( $doc );
704		return $doc;
705	}
706
707	/**
708	 * BehaviorSwitchHandler support function that adds a property named by
709	 * $variable and sets it to $state
710	 *
711	 * @deprecated Use setBehaviorSwitch() instead.
712	 * @param string $variable
713	 * @param mixed $state
714	 */
715	public function setVariable( string $variable, $state ): void {
716		$this->setBehaviorSwitch( $variable, $state );
717	}
718
719	/**
720	 * Record a behavior switch.
721	 *
722	 * @todo Does this belong here, or on some equivalent to MediaWiki's ParserOutput?
723	 * @param string $switch Switch name
724	 * @param mixed $state Relevant state data to record
725	 */
726	public function setBehaviorSwitch( string $switch, $state ): void {
727		$this->behaviorSwitches[$switch] = $state;
728	}
729
730	/**
731	 * Fetch the state of a previously-recorded behavior switch.
732	 *
733	 * @todo Does this belong here, or on some equivalent to MediaWiki's ParserOutput?
734	 * @param string $switch Switch name
735	 * @param mixed|null $default Default value if the switch was never set
736	 * @return mixed State data that was previously passed to setBehaviorSwitch(), or $default
737	 */
738	public function getBehaviorSwitch( string $switch, $default = null ) {
739		return $this->behaviorSwitches[$switch] ?? $default;
740	}
741
742	/**
743	 * @return array<string,DOMNode[]>
744	 */
745	public function getDOMFragmentMap(): array {
746		return $this->fragmentMap;
747	}
748
749	/**
750	 * @param string $id Fragment id
751	 * @return DOMNode[]
752	 */
753	public function getDOMFragment( string $id ): array {
754		return $this->fragmentMap[$id];
755	}
756
757	/**
758	 * @param string $id Fragment id
759	 * @param DOMNode[] $forest DOM forest (contiguous array of DOM trees)
760	 *   to store against the fragment id
761	 */
762	public function setDOMFragment( string $id, array $forest ): void {
763		$this->fragmentMap[$id] = $forest;
764	}
765
766	/**
767	 * Record a lint
768	 * @param string $type Lint type key
769	 * @param array $lintData Data for the lint.
770	 *  - dsr: (SourceRange)
771	 *  - params: (array)
772	 *  - templateInfo: (array|null)
773	 */
774	public function recordLint( string $type, array $lintData ): void {
775		// Parsoid-JS tests don't like getting null properties where JS had undefined.
776		$lintData = array_filter( $lintData, function ( $v ) {
777			return $v !== null;
778		} );
779
780		if ( empty( $lintData['dsr'] ) ) {
781			$this->log( 'error/lint', "Missing DSR; msg=", $lintData );
782			return;
783		}
784
785		// This will always be recorded as a native 'byte' offset
786		$lintData['dsr'] = $lintData['dsr']->jsonSerialize();
787
788		// Ensure a "params" array
789		if ( !isset( $lintData['params'] ) ) {
790			$lintData['params'] = [];
791		}
792
793		$this->lints[] = [ 'type' => $type ] + $lintData;
794	}
795
796	/**
797	 * Retrieve recorded lints
798	 * @return array[]
799	 */
800	public function getLints(): array {
801		return $this->lints;
802	}
803
804	/**
805	 * Init lints to the passed array.
806	 *
807	 * FIXME: This is currently needed to reset lints after converting
808	 * DSR offsets because of ordering of DOM passes. So, in reality,
809	 * there should be no real use case for setting this anywhere else
810	 * but from that single callsite.
811	 *
812	 * @param array $lints
813	 */
814	public function setLints( array $lints ): void {
815		$this->lints = $lints;
816	}
817
818	/**
819	 * @param mixed ...$args
820	 */
821	public function log( ...$args ): void {
822		$this->parsoidLogger->log( ...$args );
823	}
824
825	/**
826	 * Update a profile timer.
827	 *
828	 * @param string $resource
829	 * @param mixed $time
830	 * @param mixed $cat
831	 */
832	public function bumpTimeUse( string $resource, $time, $cat ): void {
833		// --trace ttm:* trip on this if we throw an exception
834		// throw new \BadMethodCallException( 'not yet ported' );
835	}
836
837	/**
838	 * Update a profile counter.
839	 *
840	 * @param string $resource
841	 * @param int $n The amount to increment the counter; defaults to 1.
842	 */
843	public function bumpCount( string $resource, int $n = 1 ): void {
844		throw new \BadMethodCallException( 'not yet ported' );
845	}
846
847	/**
848	 * Bump usage of some limited parser resource
849	 * (ex: tokens, # transclusions, # list items, etc.)
850	 *
851	 * @param string $resource
852	 * @param int $count How much of the resource is used?
853	 * @throws ResourceLimitExceededException
854	 */
855	public function bumpWt2HtmlResourceUse( string $resource, int $count = 1 ): void {
856		$n = $this->wt2htmlUsage[$resource] ?? 0;
857		$n += $count;
858		$this->wt2htmlUsage[$resource] = $n;
859		$wt2htmlLimits = $this->siteConfig->getWt2HtmlLimits();
860		if (
861			isset( $wt2htmlLimits[$resource] ) &&
862			$n > $wt2htmlLimits[$resource]
863		) {
864			// TODO: re-evaluate whether throwing an exception is really
865			// the right failure strategy when Parsoid is integrated into MW
866			// (T221238)
867			throw new ResourceLimitExceededException( "wt2html: $resource limit exceeded: $n" );
868		}
869	}
870
871	/**
872	 * Bump usage of some limited serializer resource
873	 * (ex: html size)
874	 *
875	 * @param string $resource
876	 * @param int $count How much of the resource is used? (defaults to 1)
877	 * @throws ResourceLimitExceededException
878	 */
879	public function bumpHtml2WtResourceUse( string $resource, int $count = 1 ): void {
880		$n = $this->html2wtUsage[$resource] ?? 0;
881		$n += $count;
882		$this->html2wtUsage[$resource] = $n;
883		$html2wtLimits = $this->siteConfig->getHtml2WtLimits();
884		if (
885			isset( $html2wtLimits[$resource] ) &&
886			$n > $html2wtLimits[$resource]
887		) {
888			throw new ResourceLimitExceededException( "html2wt: $resource limit exceeded: $n" );
889		}
890	}
891
892	/**
893	 * Get an appropriate content handler, given a contentmodel.
894	 *
895	 * @param string|null &$contentmodel An optional content model which
896	 *   will override whatever the source specifies.  It gets set to the
897	 *   handler which is used.
898	 * @return ContentModelHandler An appropriate content handler
899	 */
900	public function getContentHandler(
901		?string &$contentmodel = null
902	): ContentModelHandler {
903		$contentmodel = $contentmodel ?? $this->pageConfig->getContentModel();
904		$handler = $this->siteConfig->getContentModelHandler( $contentmodel );
905		if ( !$handler ) {
906			$this->log( 'warn', "Unknown contentmodel $contentmodel" );
907			$contentmodel = 'wikitext';
908			$handler = $this->siteConfig->getContentModelHandler( $contentmodel );
909		}
910		return $handler;
911	}
912
913	/**
914	 * Is the language converter enabled on this page?
915	 *
916	 * @return bool
917	 */
918	public function langConverterEnabled(): bool {
919		return $this->siteConfig->langConverterEnabledForLanguage(
920			$this->pageConfig->getPageLanguage()
921		);
922	}
923
924	/**
925	 * Indicates emit "clean" wikitext compared to what we would if we didn't normalize HTML
926	 * @return bool
927	 */
928	public function shouldScrubWikitext(): bool {
929		return $this->scrubWikitext;
930	}
931
932	/**
933	 * The HTML content version of the input document (for html2wt and html2html conversions).
934	 * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation
935	 * @see https://www.mediawiki.org/wiki/Specs/HTML/2.1.0#Versioning
936	 * @return string A semver version number
937	 */
938	public function getInputContentVersion(): string {
939		return $this->inputContentVersion;
940	}
941
942	/**
943	 * The HTML content version of the input document (for html2wt and html2html conversions).
944	 * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation
945	 * @see https://www.mediawiki.org/wiki/Specs/HTML/2.1.0#Versioning
946	 * @return string A semver version number
947	 */
948	public function getOutputContentVersion(): string {
949		return $this->outputContentVersion;
950	}
951
952	/**
953	 * If non-null, the language variant used for Parsoid HTML; we convert
954	 * to this if wt2html, or from this (if html2wt).
955	 *
956	 * @return string|null
957	 */
958	public function getHtmlVariantLanguage(): ?string {
959		return $this->htmlVariantLanguage;
960	}
961
962	/**
963	 * If non-null, the language variant to be used for wikitext.  If null,
964	 * heuristics will be used to identify the original wikitext variant
965	 * in wt2html mode, and in html2wt mode new or edited HTML will be left
966	 * unconverted.
967	 *
968	 * @return string|null
969	 */
970	public function getWtVariantLanguage(): ?string {
971		return $this->wtVariantLanguage;
972	}
973
974	/**
975	 * Update K=[V1,V2,...] that might need to be output as part of the
976	 * generated HTML.  Ex: module styles, modules scripts, ...
977	 *
978	 * @param string $key
979	 * @param array $value
980	 */
981	public function addOutputProperty( string $key, array $value ): void {
982		if ( !isset( $this->outputProps[$key] ) ) {
983			$this->outputProps[$key] = [];
984		}
985		$this->outputProps[$key] = array_merge( $this->outputProps[$key], $value );
986	}
987
988	/**
989	 * @return array
990	 */
991	public function getOutputProperties(): array {
992		return $this->outputProps;
993	}
994
995	/**
996	 * Determine appropriate vary headers for the HTML form of this page.
997	 * @return string
998	 */
999	public function htmlVary(): string {
1000		$varies = [ 'Accept' ]; // varies on Content-Type
1001		if ( $this->langConverterEnabled() ) {
1002			$varies[] = 'Accept-Language';
1003		}
1004
1005		sort( $varies );
1006		return implode( ', ', $varies );
1007	}
1008
1009	/**
1010	 * Determine an appropriate content-language for the HTML form of this page.
1011	 * @return string
1012	 */
1013	public function htmlContentLanguage(): string {
1014		// PageConfig::htmlVariant is set iff we do variant conversion on the
1015		// HTML
1016		return $this->pageConfig->getVariant() ??
1017			$this->pageConfig->getPageLanguage();
1018	}
1019}
1020