1<?php
2
3namespace Wikimedia\Purtle;
4
5use LogicException;
6
7/**
8 * RdfWriter implementation for generating JSON-LD output.
9 *
10 * @license GPL-2.0-or-later
11 * @author C. Scott Ananian
12 */
13class JsonLdRdfWriter extends RdfWriterBase {
14
15	/**
16	 * The JSON-LD "@context", which maps terms to IRIs. This is shared with all sub-writers, and a
17	 * single context is emitted when the writer is finalized.
18	 *
19	 * @see https://www.w3.org/TR/json-ld/#the-context
20	 *
21	 * @var string[]
22	 */
23	protected $context = [];
24
25	/**
26	 * A set of predicates which rely on the default typing rules for
27	 * JSON-LD; that is, values for the predicate have been emitted which
28	 * would be broken if an explicit "@type" was added to the context
29	 * for the predicate.
30	 *
31	 * @var boolean[]
32	 */
33	protected $defaulted = [];
34
35	/**
36	 * The JSON-LD "@graph", which lists all the nodes described by this JSON-LD object.
37	 * We apply an optimization eliminating the "@graph" entry if it consists
38	 * of a single node; in that case we will set $this->graph to null in
39	 * #finishJson() to ensure that the deferred callback in #finishDocument()
40	 * doesn't later emit "@graph".
41	 *
42	 * @see https://www.w3.org/TR/json-ld/#named-graphs
43	 *
44	 * @var array[]|null
45	 */
46	private $graph = [];
47
48	/**
49	 * A collection of predicates about a specific subject.  The
50	 * subject is identified by the "@id" key in this array; the other
51	 * keys identify JSON-LD properties.
52	 *
53	 * @see https://www.w3.org/TR/json-ld/#dfn-edge
54	 *
55	 * @var array
56	 */
57	private $predicates = [];
58
59	/**
60	 * A sequence of zero or more IRIs, nodes, or values, which are the
61	 * destination targets of the current predicates.
62	 *
63	 * @see https://www.w3.org/TR/json-ld/#dfn-list
64	 *
65	 * @var array
66	 */
67	private $values = [];
68
69	/**
70	 * True iff we have written the opening of the "@graph" field.
71	 *
72	 * @var bool
73	 */
74	private $wroteGraph = false;
75
76	/**
77	 * JSON-LD objects describing a single node can omit the "@graph" field;
78	 * this variable remains false only so long as we can guarantee that
79	 * only a single node has been described.
80	 *
81	 * @var bool
82	 */
83	private $disableGraphOpt = false;
84
85	/**
86	 * The IRI for the RDF `type` property.
87	 */
88	const RDF_TYPE_IRI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
89
90	/**
91	 * The type internally used for "default type", which is a string or
92	 * otherwise default-coerced type.
93	 */
94	const DEFAULT_TYPE = '@purtle@default@';
95
96	/**
97	 * @param string $role
98	 * @param BNodeLabeler|null $labeler
99	 */
100	public function __construct( $role = parent::DOCUMENT_ROLE, BNodeLabeler $labeler = null ) {
101		parent::__construct( $role, $labeler );
102
103		// The following named methods are protected, not private, so we
104		// can invoke them directly w/o function wrappers.
105		$this->transitionTable[self::STATE_START][self::STATE_DOCUMENT] =
106			[ $this, 'beginJson' ];
107		$this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] =
108			[ $this, 'finishJson' ];
109		$this->transitionTable[self::STATE_OBJECT][self::STATE_PREDICATE] =
110			[ $this, 'finishPredicate' ];
111		$this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] =
112			[ $this, 'finishSubject' ];
113		$this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] =
114			[ $this, 'finishDocument' ];
115	}
116
117	/**
118	 * Emit $val as JSON, with $indent extra indentations on each line.
119	 * @param array $val
120	 * @param int $indent
121	 * @return string the JSON string for $val
122	 */
123	public function encode( $val, $indent ) {
124		$str = json_encode( $val, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES );
125		// Strip outermost open/close braces/brackets
126		$str = preg_replace( '/^[[{]\n?|\n?[}\]]$/', '', $str );
127
128		if ( $indent > 0 ) {
129			// add extra indentation
130			$str = preg_replace( '/^/m', str_repeat( '    ', $indent ), $str );
131		}
132
133		return $str;
134	}
135
136	/**
137	 * Return a "compact IRI" corresponding to the given base/local pair.
138	 * This adds entries to the "@context" key when needed to allow use
139	 * of a given prefix.
140	 * @see https://www.w3.org/TR/json-ld/#dfn-compact-iri
141	 *
142	 * @param string $base A QName prefix if $local is given, or an IRI if $local is null.
143	 * @param string|null $local A QName suffix, or null if $base is an IRI.
144	 *
145	 * @return string A compact IRI.
146	 */
147	private function compactify( $base, $local = null ) {
148		$this->expandShorthand( $base, $local );
149
150		if ( $local === null ) {
151			return $base;
152		} else {
153			if ( $base !== '_' && isset( $this->prefixes[ $base ] ) ) {
154				if ( $base === '' ) {
155					// Empty prefix not supported; use full IRI
156					return $this->prefixes[ $base ] . $local;
157				}
158				if ( !isset( $this->context[ $base ] ) ) {
159					$this->context[ $base ] = $this->prefixes[ $base ];
160				}
161				if ( $this->context[ $base ] !== $this->prefixes[ $base ] ) {
162					// Context name conflict; use full IRI
163					return $this->prefixes[ $base ] . $local;
164				}
165			}
166			return $base . ':' . $local;
167		}
168	}
169
170	/**
171	 * Return an absolute IRI from the given base/local pair.
172	 * @see https://www.w3.org/TR/json-ld/#dfn-absolute-iri
173	 *
174	 * @param string $base A QName prefix if $local is given, or an IRI if $local is null.
175	 * @param string|null $local A QName suffix, or null if $base is an IRI.
176	 *
177	 * @return string|null An absolute IRI, or null if it cannot be constructed.
178	 */
179	private function toIRI( $base, $local ) {
180		$this->expandShorthand( $base, $local );
181		$this->expandQName( $base, $local );
182		if ( $local !== null ) {
183			throw new LogicException( 'Unknown prefix: ' . $base );
184		}
185		return $base;
186	}
187
188	/**
189	 * Return a appropriate term for the current predicate value.
190	 */
191	private function getCurrentTerm() {
192		list( $base, $local ) = $this->currentPredicate;
193		$predIRI = $this->toIRI( $base, $local );
194		if ( $predIRI === self::RDF_TYPE_IRI ) {
195			return $predIRI;
196		}
197		$this->expandShorthand( $base, $local );
198		if ( $local === null ) {
199			return $base;
200		} elseif ( $base !== '_' && !isset( $this->prefixes[ $local ] ) ) {
201			// Prefixes get priority over field names in @context
202			$pred = $this->compactify( $base, $local );
203			if ( !isset( $this->context[ $local ] ) ) {
204				$this->context[ $local ] = [ '@id' => $pred ];
205			}
206			if ( $this->context[ $local ][ '@id' ] === $pred ) {
207				return $local;
208			}
209			return $pred;
210		}
211		return $this->compactify( $base, $local );
212	}
213
214	/**
215	 * Write document header.
216	 */
217	protected function beginJson() {
218		if ( $this->role === self::DOCUMENT_ROLE ) {
219			$this->write( "{\n" );
220			$this->write( function () {
221				// If this buffer is drained early, disable @graph optimization
222				$this->disableGraphOpt = true;
223				return '';
224			} );
225		}
226	}
227
228	/**
229	 * Write document footer.
230	 */
231	protected function finishJson() {
232		// If we haven't drained yet, and @graph has only 1 element, then we
233		// can optimize our output and hoist the single node to top level.
234		if ( $this->role === self::DOCUMENT_ROLE ) {
235			if ( ( !$this->disableGraphOpt ) && count( $this->graph ) === 1 ) {
236				$this->write( $this->encode( $this->graph[0], 0 ) );
237				$this->graph = null; // We're done with @graph.
238			} else {
239				$this->disableGraphOpt = true;
240				$this->write( "\n    ]" );
241			}
242		}
243
244		if ( count( $this->context ) ) {
245			// Write @context field.
246			$this->write( ",\n" );
247			$this->write( $this->encode( [
248				'@context' => $this->context
249			], 0 ) );
250		}
251
252		$this->write( "\n}" );
253	}
254
255	protected function finishDocument() {
256		$this->finishSubject();
257		$this->write( function () {
258			// if this is drained before finishJson(), then disable
259			// the graph optimization and dump what we've got so far.
260			$str = '';
261			if ( $this->graph !== null && count( $this->graph ) > 0 ) {
262				$this->disableGraphOpt = true;
263				if ( $this->role === self::DOCUMENT_ROLE && !$this->wroteGraph ) {
264					$str .= "    \"@graph\": [\n";
265					$this->wroteGraph = true;
266				} else {
267					$str .= ",\n";
268				}
269				$str .= $this->encode( $this->graph, 1 );
270				$this->graph = [];
271				return $str;
272			}
273			// Delay; maybe we'll be able to optimize this later.
274			return $str;
275		} );
276	}
277
278	/**
279	 * @param string $base
280	 * @param string|null $local
281	 */
282	protected function writeSubject( $base, $local = null ) {
283		$this->predicates = [
284			'@id' => $this->compactify( $base, $local )
285		];
286	}
287
288	protected function finishSubject() {
289		$this->finishPredicate();
290		$this->graph[] = $this->predicates;
291	}
292
293	/**
294	 * @param string $base
295	 * @param string|null $local
296	 */
297	protected function writePredicate( $base, $local = null ) {
298		// no op
299	}
300
301	/**
302	 * @param string $base
303	 * @param string|null $local
304	 */
305	protected function writeResource( $base, $local = null ) {
306		$pred = $this->getCurrentTerm();
307		$value = $this->compactify( $base, $local );
308		$this->addTypedValue( '@id', $value, [
309			'@id' => $value
310		], ( $pred === self::RDF_TYPE_IRI ) );
311	}
312
313	/**
314	 * @param string $text
315	 * @param string|null $language
316	 */
317	protected function writeText( $text, $language = null ) {
318		if ( !$this->isValidLanguageCode( $language ) ) {
319			$this->addTypedValue( self::DEFAULT_TYPE, $text );
320		} else {
321			$expanded = [
322				'@language' => $language,
323				'@value' => $text
324			];
325			$this->addTypedValue( self::DEFAULT_TYPE, $expanded, $expanded );
326		}
327	}
328
329	/**
330	 * @param string $literal
331	 * @param string|null $typeBase
332	 * @param string|null $typeLocal
333	 */
334	public function writeValue( $literal, $typeBase, $typeLocal = null ) {
335		if ( $typeBase === null && $typeLocal === null ) {
336			$this->addTypedValue( self::DEFAULT_TYPE, $literal );
337			return;
338		}
339
340		switch ( $this->toIRI( $typeBase, $typeLocal ) ) {
341			case 'http://www.w3.org/2001/XMLSchema#string':
342				$this->addTypedValue( self::DEFAULT_TYPE, strval( $literal ) );
343				return;
344			case 'http://www.w3.org/2001/XMLSchema#integer':
345				$this->addTypedValue( self::DEFAULT_TYPE, intval( $literal ) );
346				return;
347			case 'http://www.w3.org/2001/XMLSchema#boolean':
348				$this->addTypedValue( self::DEFAULT_TYPE, ( $literal === 'true' ) );
349				return;
350			case 'http://www.w3.org/2001/XMLSchema#double':
351				$v = doubleval( $literal );
352				// Only "numbers with fractions" are xsd:double.  We need
353				// to verify that the JSON string will contain a decimal
354				// point, otherwise the value would be interpreted as an
355				// xsd:integer.
356				// TODO: consider instead using JSON_PRESERVE_ZERO_FRACTION
357				// in $this->encode() once our required PHP >= 5.6.6.
358				// OTOH, the spec language is ambiguous about whether "5."
359				// would be considered an integer or a double.
360				if ( strpos( json_encode( $v ), '.' ) !== false ) {
361					$this->addTypedValue( self::DEFAULT_TYPE, $v );
362					return;
363				}
364		}
365
366		$type = $this->compactify( $typeBase, $typeLocal );
367		$literal = strval( $literal );
368		$this->addTypedValue( $type, $literal, [
369			'@type' => $type,
370			'@value' => $literal
371		] );
372	}
373
374	/**
375	 * Add a typed value for the given predicate.  If possible, adds a
376	 * default type to the context to avoid having to repeat type information
377	 * in each value for this predicate.  If there is already a default
378	 * type which conflicts with this one, or if $forceExpand is true,
379	 * then use the "expanded" value which will explicitly override any
380	 * default type.
381	 *
382	 * @param string $type The compactified JSON-LD @type for this value, or
383	 *  self::DEFAULT_TYPE to indicate the default JSON-LD type coercion rules
384	 *  should be used.
385	 * @param string|int|float|bool $simpleVal The "simple" representation
386	 *  for this value, used if the type can be hoisted into the context.
387	 * @param array|null $expandedVal The "expanded" representation for this
388	 *  value, used if the context @type conflicts with this value; or null
389	 *  to use "@value" for the expanded representation.
390	 * @param bool $forceExpand If true, don't try to add this type to the
391	 *  context. Defaults to false.
392	 */
393	protected function addTypedValue( $type, $simpleVal, $expandedVal=null, $forceExpand=false ) {
394		if ( !$forceExpand ) {
395			$pred = $this->getCurrentTerm();
396			if ( $type === self::DEFAULT_TYPE ) {
397				if ( !isset( $this->context[ $pred ][ '@type' ] ) ) {
398					$this->defaulted[ $pred ] = true;
399				}
400				if ( isset( $this->defaulted[ $pred ] ) ) {
401					$this->values[] = $simpleVal;
402					return;
403				}
404			} elseif ( !isset( $this->defaulted[ $pred ] ) ) {
405				if ( !isset( $this->context[ $pred ] ) ) {
406					$this->context[ $pred ] = [];
407				}
408				if ( !isset( $this->context[ $pred ][ '@type' ] ) ) {
409					$this->context[ $pred ][ '@type' ] = $type;
410				}
411				if ( $this->context[ $pred ][ '@type' ] === $type ) {
412					$this->values[] = $simpleVal;
413					return;
414				}
415			}
416		}
417		if ( $expandedVal === null ) {
418			$this->values[] = [ '@value' => $simpleVal ];
419		} else {
420			$this->values[] = $expandedVal;
421		}
422	}
423
424	protected function finishPredicate() {
425		$name = $this->getCurrentTerm();
426
427		if ( $name === self::RDF_TYPE_IRI ) {
428			$name = '@type';
429			$this->values = array_map( function ( array $val ) {
430				return $val[ '@id' ];
431			}, $this->values );
432		}
433		if ( isset( $this->predicates[$name] ) ) {
434			$was = $this->predicates[$name];
435			// Wrap $was into a numeric indexed array if it isn't already.
436			// Note that $was could have non-numeric indices, eg
437			// [ "@id" => "foo" ], in which was it still needs to be wrapped.
438			if ( !( is_array( $was ) && isset( $was[0] ) ) ) {
439				$was = [ $was ];
440			}
441			$this->values = array_merge( $was, $this->values );
442		}
443
444		$cnt = count( $this->values );
445		if ( $cnt === 0 ) {
446			throw new LogicException( 'finishPredicate can\'t be called without at least one value' );
447		} elseif ( $cnt === 1 ) {
448			$this->predicates[$name] = $this->values[0];
449		} else {
450			$this->predicates[$name] = $this->values;
451		}
452
453		$this->values = [];
454	}
455
456	/**
457	 * @param string $role
458	 * @param BNodeLabeler $labeler
459	 *
460	 * @return RdfWriterBase
461	 */
462	protected function newSubWriter( $role, BNodeLabeler $labeler ) {
463		$writer = new self( $role, $labeler );
464
465		// Have subwriter share context with this parent.
466		$writer->context = &$this->context;
467		$writer->defaulted = &$this->defaulted;
468
469		// We can't use the @graph optimization.
470		$this->disableGraphOpt = true;
471
472		return $writer;
473	}
474
475	/**
476	 * @return string a MIME type
477	 */
478	public function getMimeType() {
479		return 'application/ld+json; charset=UTF-8';
480	}
481
482}
483