1<?php 2 3namespace Wikimedia\Purtle; 4 5use LogicException; 6 7/** 8 * RdfWriter implementation for generating JSON-LD output. 9 * 10 * @license GPL-2.0-or-later 11 * @author C. Scott Ananian 12 */ 13class JsonLdRdfWriter extends RdfWriterBase { 14 15 /** 16 * The JSON-LD "@context", which maps terms to IRIs. This is shared with all sub-writers, and a 17 * single context is emitted when the writer is finalized. 18 * 19 * @see https://www.w3.org/TR/json-ld/#the-context 20 * 21 * @var string[] 22 */ 23 protected $context = []; 24 25 /** 26 * A set of predicates which rely on the default typing rules for 27 * JSON-LD; that is, values for the predicate have been emitted which 28 * would be broken if an explicit "@type" was added to the context 29 * for the predicate. 30 * 31 * @var boolean[] 32 */ 33 protected $defaulted = []; 34 35 /** 36 * The JSON-LD "@graph", which lists all the nodes described by this JSON-LD object. 37 * We apply an optimization eliminating the "@graph" entry if it consists 38 * of a single node; in that case we will set $this->graph to null in 39 * #finishJson() to ensure that the deferred callback in #finishDocument() 40 * doesn't later emit "@graph". 41 * 42 * @see https://www.w3.org/TR/json-ld/#named-graphs 43 * 44 * @var array[]|null 45 */ 46 private $graph = []; 47 48 /** 49 * A collection of predicates about a specific subject. The 50 * subject is identified by the "@id" key in this array; the other 51 * keys identify JSON-LD properties. 52 * 53 * @see https://www.w3.org/TR/json-ld/#dfn-edge 54 * 55 * @var array 56 */ 57 private $predicates = []; 58 59 /** 60 * A sequence of zero or more IRIs, nodes, or values, which are the 61 * destination targets of the current predicates. 62 * 63 * @see https://www.w3.org/TR/json-ld/#dfn-list 64 * 65 * @var array 66 */ 67 private $values = []; 68 69 /** 70 * True iff we have written the opening of the "@graph" field. 71 * 72 * @var bool 73 */ 74 private $wroteGraph = false; 75 76 /** 77 * JSON-LD objects describing a single node can omit the "@graph" field; 78 * this variable remains false only so long as we can guarantee that 79 * only a single node has been described. 80 * 81 * @var bool 82 */ 83 private $disableGraphOpt = false; 84 85 /** 86 * The IRI for the RDF `type` property. 87 */ 88 const RDF_TYPE_IRI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'; 89 90 /** 91 * The type internally used for "default type", which is a string or 92 * otherwise default-coerced type. 93 */ 94 const DEFAULT_TYPE = '@purtle@default@'; 95 96 /** 97 * @param string $role 98 * @param BNodeLabeler|null $labeler 99 */ 100 public function __construct( $role = parent::DOCUMENT_ROLE, BNodeLabeler $labeler = null ) { 101 parent::__construct( $role, $labeler ); 102 103 // The following named methods are protected, not private, so we 104 // can invoke them directly w/o function wrappers. 105 $this->transitionTable[self::STATE_START][self::STATE_DOCUMENT] = 106 [ $this, 'beginJson' ]; 107 $this->transitionTable[self::STATE_DOCUMENT][self::STATE_FINISH] = 108 [ $this, 'finishJson' ]; 109 $this->transitionTable[self::STATE_OBJECT][self::STATE_PREDICATE] = 110 [ $this, 'finishPredicate' ]; 111 $this->transitionTable[self::STATE_OBJECT][self::STATE_SUBJECT] = 112 [ $this, 'finishSubject' ]; 113 $this->transitionTable[self::STATE_OBJECT][self::STATE_DOCUMENT] = 114 [ $this, 'finishDocument' ]; 115 } 116 117 /** 118 * Emit $val as JSON, with $indent extra indentations on each line. 119 * @param array $val 120 * @param int $indent 121 * @return string the JSON string for $val 122 */ 123 public function encode( $val, $indent ) { 124 $str = json_encode( $val, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ); 125 // Strip outermost open/close braces/brackets 126 $str = preg_replace( '/^[[{]\n?|\n?[}\]]$/', '', $str ); 127 128 if ( $indent > 0 ) { 129 // add extra indentation 130 $str = preg_replace( '/^/m', str_repeat( ' ', $indent ), $str ); 131 } 132 133 return $str; 134 } 135 136 /** 137 * Return a "compact IRI" corresponding to the given base/local pair. 138 * This adds entries to the "@context" key when needed to allow use 139 * of a given prefix. 140 * @see https://www.w3.org/TR/json-ld/#dfn-compact-iri 141 * 142 * @param string $base A QName prefix if $local is given, or an IRI if $local is null. 143 * @param string|null $local A QName suffix, or null if $base is an IRI. 144 * 145 * @return string A compact IRI. 146 */ 147 private function compactify( $base, $local = null ) { 148 $this->expandShorthand( $base, $local ); 149 150 if ( $local === null ) { 151 return $base; 152 } else { 153 if ( $base !== '_' && isset( $this->prefixes[ $base ] ) ) { 154 if ( $base === '' ) { 155 // Empty prefix not supported; use full IRI 156 return $this->prefixes[ $base ] . $local; 157 } 158 if ( !isset( $this->context[ $base ] ) ) { 159 $this->context[ $base ] = $this->prefixes[ $base ]; 160 } 161 if ( $this->context[ $base ] !== $this->prefixes[ $base ] ) { 162 // Context name conflict; use full IRI 163 return $this->prefixes[ $base ] . $local; 164 } 165 } 166 return $base . ':' . $local; 167 } 168 } 169 170 /** 171 * Return an absolute IRI from the given base/local pair. 172 * @see https://www.w3.org/TR/json-ld/#dfn-absolute-iri 173 * 174 * @param string $base A QName prefix if $local is given, or an IRI if $local is null. 175 * @param string|null $local A QName suffix, or null if $base is an IRI. 176 * 177 * @return string|null An absolute IRI, or null if it cannot be constructed. 178 */ 179 private function toIRI( $base, $local ) { 180 $this->expandShorthand( $base, $local ); 181 $this->expandQName( $base, $local ); 182 if ( $local !== null ) { 183 throw new LogicException( 'Unknown prefix: ' . $base ); 184 } 185 return $base; 186 } 187 188 /** 189 * Return a appropriate term for the current predicate value. 190 */ 191 private function getCurrentTerm() { 192 list( $base, $local ) = $this->currentPredicate; 193 $predIRI = $this->toIRI( $base, $local ); 194 if ( $predIRI === self::RDF_TYPE_IRI ) { 195 return $predIRI; 196 } 197 $this->expandShorthand( $base, $local ); 198 if ( $local === null ) { 199 return $base; 200 } elseif ( $base !== '_' && !isset( $this->prefixes[ $local ] ) ) { 201 // Prefixes get priority over field names in @context 202 $pred = $this->compactify( $base, $local ); 203 if ( !isset( $this->context[ $local ] ) ) { 204 $this->context[ $local ] = [ '@id' => $pred ]; 205 } 206 if ( $this->context[ $local ][ '@id' ] === $pred ) { 207 return $local; 208 } 209 return $pred; 210 } 211 return $this->compactify( $base, $local ); 212 } 213 214 /** 215 * Write document header. 216 */ 217 protected function beginJson() { 218 if ( $this->role === self::DOCUMENT_ROLE ) { 219 $this->write( "{\n" ); 220 $this->write( function () { 221 // If this buffer is drained early, disable @graph optimization 222 $this->disableGraphOpt = true; 223 return ''; 224 } ); 225 } 226 } 227 228 /** 229 * Write document footer. 230 */ 231 protected function finishJson() { 232 // If we haven't drained yet, and @graph has only 1 element, then we 233 // can optimize our output and hoist the single node to top level. 234 if ( $this->role === self::DOCUMENT_ROLE ) { 235 if ( ( !$this->disableGraphOpt ) && count( $this->graph ) === 1 ) { 236 $this->write( $this->encode( $this->graph[0], 0 ) ); 237 $this->graph = null; // We're done with @graph. 238 } else { 239 $this->disableGraphOpt = true; 240 $this->write( "\n ]" ); 241 } 242 } 243 244 if ( count( $this->context ) ) { 245 // Write @context field. 246 $this->write( ",\n" ); 247 $this->write( $this->encode( [ 248 '@context' => $this->context 249 ], 0 ) ); 250 } 251 252 $this->write( "\n}" ); 253 } 254 255 protected function finishDocument() { 256 $this->finishSubject(); 257 $this->write( function () { 258 // if this is drained before finishJson(), then disable 259 // the graph optimization and dump what we've got so far. 260 $str = ''; 261 if ( $this->graph !== null && count( $this->graph ) > 0 ) { 262 $this->disableGraphOpt = true; 263 if ( $this->role === self::DOCUMENT_ROLE && !$this->wroteGraph ) { 264 $str .= " \"@graph\": [\n"; 265 $this->wroteGraph = true; 266 } else { 267 $str .= ",\n"; 268 } 269 $str .= $this->encode( $this->graph, 1 ); 270 $this->graph = []; 271 return $str; 272 } 273 // Delay; maybe we'll be able to optimize this later. 274 return $str; 275 } ); 276 } 277 278 /** 279 * @param string $base 280 * @param string|null $local 281 */ 282 protected function writeSubject( $base, $local = null ) { 283 $this->predicates = [ 284 '@id' => $this->compactify( $base, $local ) 285 ]; 286 } 287 288 protected function finishSubject() { 289 $this->finishPredicate(); 290 $this->graph[] = $this->predicates; 291 } 292 293 /** 294 * @param string $base 295 * @param string|null $local 296 */ 297 protected function writePredicate( $base, $local = null ) { 298 // no op 299 } 300 301 /** 302 * @param string $base 303 * @param string|null $local 304 */ 305 protected function writeResource( $base, $local = null ) { 306 $pred = $this->getCurrentTerm(); 307 $value = $this->compactify( $base, $local ); 308 $this->addTypedValue( '@id', $value, [ 309 '@id' => $value 310 ], ( $pred === self::RDF_TYPE_IRI ) ); 311 } 312 313 /** 314 * @param string $text 315 * @param string|null $language 316 */ 317 protected function writeText( $text, $language = null ) { 318 if ( !$this->isValidLanguageCode( $language ) ) { 319 $this->addTypedValue( self::DEFAULT_TYPE, $text ); 320 } else { 321 $expanded = [ 322 '@language' => $language, 323 '@value' => $text 324 ]; 325 $this->addTypedValue( self::DEFAULT_TYPE, $expanded, $expanded ); 326 } 327 } 328 329 /** 330 * @param string $literal 331 * @param string|null $typeBase 332 * @param string|null $typeLocal 333 */ 334 public function writeValue( $literal, $typeBase, $typeLocal = null ) { 335 if ( $typeBase === null && $typeLocal === null ) { 336 $this->addTypedValue( self::DEFAULT_TYPE, $literal ); 337 return; 338 } 339 340 switch ( $this->toIRI( $typeBase, $typeLocal ) ) { 341 case 'http://www.w3.org/2001/XMLSchema#string': 342 $this->addTypedValue( self::DEFAULT_TYPE, strval( $literal ) ); 343 return; 344 case 'http://www.w3.org/2001/XMLSchema#integer': 345 $this->addTypedValue( self::DEFAULT_TYPE, intval( $literal ) ); 346 return; 347 case 'http://www.w3.org/2001/XMLSchema#boolean': 348 $this->addTypedValue( self::DEFAULT_TYPE, ( $literal === 'true' ) ); 349 return; 350 case 'http://www.w3.org/2001/XMLSchema#double': 351 $v = doubleval( $literal ); 352 // Only "numbers with fractions" are xsd:double. We need 353 // to verify that the JSON string will contain a decimal 354 // point, otherwise the value would be interpreted as an 355 // xsd:integer. 356 // TODO: consider instead using JSON_PRESERVE_ZERO_FRACTION 357 // in $this->encode() once our required PHP >= 5.6.6. 358 // OTOH, the spec language is ambiguous about whether "5." 359 // would be considered an integer or a double. 360 if ( strpos( json_encode( $v ), '.' ) !== false ) { 361 $this->addTypedValue( self::DEFAULT_TYPE, $v ); 362 return; 363 } 364 } 365 366 $type = $this->compactify( $typeBase, $typeLocal ); 367 $literal = strval( $literal ); 368 $this->addTypedValue( $type, $literal, [ 369 '@type' => $type, 370 '@value' => $literal 371 ] ); 372 } 373 374 /** 375 * Add a typed value for the given predicate. If possible, adds a 376 * default type to the context to avoid having to repeat type information 377 * in each value for this predicate. If there is already a default 378 * type which conflicts with this one, or if $forceExpand is true, 379 * then use the "expanded" value which will explicitly override any 380 * default type. 381 * 382 * @param string $type The compactified JSON-LD @type for this value, or 383 * self::DEFAULT_TYPE to indicate the default JSON-LD type coercion rules 384 * should be used. 385 * @param string|int|float|bool $simpleVal The "simple" representation 386 * for this value, used if the type can be hoisted into the context. 387 * @param array|null $expandedVal The "expanded" representation for this 388 * value, used if the context @type conflicts with this value; or null 389 * to use "@value" for the expanded representation. 390 * @param bool $forceExpand If true, don't try to add this type to the 391 * context. Defaults to false. 392 */ 393 protected function addTypedValue( $type, $simpleVal, $expandedVal=null, $forceExpand=false ) { 394 if ( !$forceExpand ) { 395 $pred = $this->getCurrentTerm(); 396 if ( $type === self::DEFAULT_TYPE ) { 397 if ( !isset( $this->context[ $pred ][ '@type' ] ) ) { 398 $this->defaulted[ $pred ] = true; 399 } 400 if ( isset( $this->defaulted[ $pred ] ) ) { 401 $this->values[] = $simpleVal; 402 return; 403 } 404 } elseif ( !isset( $this->defaulted[ $pred ] ) ) { 405 if ( !isset( $this->context[ $pred ] ) ) { 406 $this->context[ $pred ] = []; 407 } 408 if ( !isset( $this->context[ $pred ][ '@type' ] ) ) { 409 $this->context[ $pred ][ '@type' ] = $type; 410 } 411 if ( $this->context[ $pred ][ '@type' ] === $type ) { 412 $this->values[] = $simpleVal; 413 return; 414 } 415 } 416 } 417 if ( $expandedVal === null ) { 418 $this->values[] = [ '@value' => $simpleVal ]; 419 } else { 420 $this->values[] = $expandedVal; 421 } 422 } 423 424 protected function finishPredicate() { 425 $name = $this->getCurrentTerm(); 426 427 if ( $name === self::RDF_TYPE_IRI ) { 428 $name = '@type'; 429 $this->values = array_map( function ( array $val ) { 430 return $val[ '@id' ]; 431 }, $this->values ); 432 } 433 if ( isset( $this->predicates[$name] ) ) { 434 $was = $this->predicates[$name]; 435 // Wrap $was into a numeric indexed array if it isn't already. 436 // Note that $was could have non-numeric indices, eg 437 // [ "@id" => "foo" ], in which was it still needs to be wrapped. 438 if ( !( is_array( $was ) && isset( $was[0] ) ) ) { 439 $was = [ $was ]; 440 } 441 $this->values = array_merge( $was, $this->values ); 442 } 443 444 $cnt = count( $this->values ); 445 if ( $cnt === 0 ) { 446 throw new LogicException( 'finishPredicate can\'t be called without at least one value' ); 447 } elseif ( $cnt === 1 ) { 448 $this->predicates[$name] = $this->values[0]; 449 } else { 450 $this->predicates[$name] = $this->values; 451 } 452 453 $this->values = []; 454 } 455 456 /** 457 * @param string $role 458 * @param BNodeLabeler $labeler 459 * 460 * @return RdfWriterBase 461 */ 462 protected function newSubWriter( $role, BNodeLabeler $labeler ) { 463 $writer = new self( $role, $labeler ); 464 465 // Have subwriter share context with this parent. 466 $writer->context = &$this->context; 467 $writer->defaulted = &$this->defaulted; 468 469 // We can't use the @graph optimization. 470 $this->disableGraphOpt = true; 471 472 return $writer; 473 } 474 475 /** 476 * @return string a MIME type 477 */ 478 public function getMimeType() { 479 return 'application/ld+json; charset=UTF-8'; 480 } 481 482} 483