1# -*- coding: utf-8 -*- 2 3""" 4.. 5 .. seealso:: `SPARQL Specification <http://www.w3.org/TR/rdf-sparql-query/>`_ 6 7 Developers involved: 8 9 * Ivan Herman <http://www.ivan-herman.net> 10 * Sergio Fernández <http://www.wikier.org> 11 * Carlos Tejo Alonso <http://www.dayures.net> 12 * Alexey Zakhlestin <https://indeyets.ru/> 13 14 Organizations involved: 15 16 * `World Wide Web Consortium <http://www.w3.org>`_ 17 * `Salzburg Research <http://www.salzburgresearch.at>`_ 18 * `Foundation CTIC <http://www.fundacionctic.org/>`_ 19 20 :license: `W3C® Software notice and license <http://www.w3.org/Consortium/Legal/copyright-software>`_ 21 22 :requires: `RDFLib <https://rdflib.readthedocs.io>`_ package. 23""" 24 25import urllib 26import urllib2 27from urllib2 import urlopen as urlopener # don't change the name: tests override it 28import base64 29import re 30import sys 31import warnings 32 33import json 34from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict 35from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError, Unauthorized, URITooLong 36from SPARQLWrapper import __agent__ 37 38# From <https://www.w3.org/TR/sparql11-protocol/#query-success> 39# The response body of a successful query operation with a 2XX response is either: 40# * SELECT and ASK: a SPARQL Results Document in XML, JSON, or CSV/TSV format. 41# * DESCRIBE and CONSTRUCT: an RDF graph serialized, for example, in the RDF/XML syntax, or an equivalent RDF graph serialization. 42# 43# Possible parameter keys and values... 44# Examples: 45# - ClioPatria: the SWI-Prolog Semantic Web Server <http://cliopatria.swi-prolog.org/home> 46# * Parameter key: "format" <http://cliopatria.swi-prolog.org/help/http> 47# * Parameter value must have one of these values: "rdf+xml", "json", "csv", "application/sparql-results+xml" or "application/sparql-results+json". 48# 49################################################################################ 50# 51# - OpenLink Virtuoso <http://virtuoso.openlinksw.com> 52# * Parameter key: "format" or "output" 53# * Parameter value, like directly: 54# "text/html" (HTML), "text/x-html+tr" (HTML (Faceted Browsing Links)), "application/vnd.ms-excel" 55# "application/sparql-results+xml" (XML), "application/sparql-results+json", (JSON) 56# "application/javascript" (Javascript), "text/turtle" (Turtle), "application/rdf+xml" (RDF/XML) 57# "text/plain" (N-Triples), "text/csv" (CSV), "text/tab-separated-values" (TSV) 58# * Parameter value, like indirectly: 59# "HTML" (alias text/html), "JSON" (alias application/sparql-results+json), "XML" (alias application/sparql-results+xml), "TURTLE" (alias text/rdf+n3), JavaScript (alias application/javascript) 60# See <http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VOSSparqlProtocol#Additional HTTP Response Formats -- SELECT> 61# 62# For a SELECT query type, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 63# For a ASK query type, the default return mimetype (if Accept: */* is sent) is text/html 64# For a CONSTRUCT query type, the default return mimetype (if Accept: */* is sent) is text/turtle 65# For a DESCRIBE query type, the default return mimetype (if Accept: */* is sent) is text/turtle 66# 67################################################################################ 68# 69# - Fuseki (formerly there was Joseki) <https://jena.apache.org/documentation/serving_data/> 70# * Uses: Parameters AND Content Negotiation 71# * Parameter key: "format" or "output" 72# * JSON-LD (application/ld+json): supported (in CONSTRUCT and DESCRIBE) 73# 74# * Parameter key: "format" or "output" 75# See Fuseki 1: https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/HttpNames.java 76# See Fuseki 2: https://github.com/apache/jena/blob/master/jena-arq/src/main/java/org/apache/jena/riot/web/HttpNames.java 77# * Fuseki 1 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 78# See <https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 79# * Fuseki 2 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 80# See <https://github.com/apache/jena/blob/master/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 81# If a non-expected short name is used, the server returns an "Error 400: Can't determine output serialization" 82# Valid alias for SELECT and ASK: "json", "xml", csv", "tsv" 83# Valid alias for DESCRIBE and CONSTRUCT: "json" (alias for json-ld ONLY in Fuseki2), "xml" 84# Valid mimetype for DESCRIBE and CONSTRUCT: "application/ld+json" 85# Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+json 86# Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is text/turtle 87# In case of a bad formed query, Fuseki1 returns 200 instead of 400. 88# 89################################################################################ 90# 91# - Eclipse RDF4J <http://rdf4j.org/> 92# * Formerly known as OpenRDF Sesame 93# * Uses: ONLY Content Negotiation 94# * See <https://rdf4j.eclipse.org/documentation/rest-api/#the-query-operation> 95# * See <https://rdf4j.eclipse.org/documentation/rest-api/#content-types> 96# * Parameter: If an unexpected parameter is used, the server ignores it. 97# 98# ** SELECT 99# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)) 100# *** application/sparql-results+json (also application/json) 101# *** text/csv 102# *** text/tab-separated-values 103# *** Other values: application/x-binary-rdf-results-table 104# 105# ** ASK 106# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)) 107# *** application/sparql-results+json 108# *** Other values: text/boolean 109# *** Not supported: text/csv 110# *** Not supported: text/tab-separated-values 111# 112# ** CONSTRUCT 113# *** application/rdf+xml 114# *** application/n-triples (DEFAULT if Accept: */* is sent) 115# *** text/turtle 116# *** text/n3 117# *** application/ld+json 118# *** Other acceptable values: application/n-quads, application/rdf+json, application/trig, application/trix, application/x-binary-rdf 119# *** text/plain (returns application/n-triples) 120# *** text/rdf+n3 (returns text/n3) 121# *** text/x-nquads (returns application/n-quads) 122# 123# ** DESCRIBE 124# *** application/rdf+xml 125# *** application/n-triples (DEFAULT if Accept: */* is sent) 126# *** text/turtle 127# *** text/n3 128# *** application/ld+json 129# *** Other acceptable values: application/n-quads, application/rdf+json, application/trig, application/trix, application/x-binary-rdf 130# *** text/plain (returns application/n-triples) 131# *** text/rdf+n3 (returns text/n3) 132# *** text/x-nquads (returns application/n-quads) 133# 134# Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 135# Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/n-triples 136# 137# 138################################################################################ 139# 140# - RASQAL <http://librdf.org/rasqal/> 141# * Parameter key: "results" 142# * Uses roqet as RDF query utility 143# For variable bindings, the values of FORMAT vary upon what Rasqal supports but include simple 144# for a simple text format (default), xml for the SPARQL Query Results XML format, csv for SPARQL CSV, 145# tsv for SPARQL TSV, rdfxml and turtle for RDF syntax formats, and json for a JSON version of the results. 146# 147# For RDF graph results, the values of FORMAT are ntriples (N-Triples, default), 148# rdfxml-abbrev (RDF/XML Abbreviated), rdfxml (RDF/XML), turtle (Turtle), 149# json (RDF/JSON resource centric), json-triples (RDF/JSON triples) or 150# rss-1.0 (RSS 1.0, also an RDF/XML syntax). 151# 152# See <http://librdf.org/rasqal/roqet.html> 153# 154################################################################################ 155# 156# - Marklogic <http://marklogic.com> 157# * Uses content negotiation (no URL parameters). 158# * You can use following methods to query triples <https://docs.marklogic.com/guide/semantics/semantic-searches#chapter>: 159# - SPARQL mode in Query Console. For details, see Querying Triples with SPARQL 160# - XQuery using the semantics functions, and Search API, or a combination of XQuery and SPARQL. For details, see Querying Triples with XQuery or JavaScript. 161# - HTTP via a SPARQL endpoint. For details, see Using Semantics with the REST Client API. 162# * Formats are specified as part of the HTTP Accept headers of the REST request. <https://docs.marklogic.com/guide/semantics/REST#id_92428> 163# - When you query the SPARQL endpoint with REST Client APIs, you can specify the result output format. <https://docs.marklogic.com/guide/semantics/REST#id_54258> 164# The response type format depends on the type of query and the MIME type in the HTTP Accept header. 165# - This table describes the MIME types and Accept Header/Output formats (MIME type) for different types of SPARQL queries. See <https://docs.marklogic.com/guide/semantics/REST#id_54258> and <https://docs.marklogic.com/guide/semantics/loading#id_70682> 166# SELECT "application/sparql-results+xml", "application/sparql-results+json", "text/html", "text/csv" 167# CONSTRUCT or DESCRIBE "application/n-triples", "application/rdf+json", "application/rdf+xml", "text/turtle", "text/n3", "application/n-quads", "application/trig" 168# ASK queries return a boolean (true or false). 169# 170################################################################################ 171# 172# - AllegroGraph <https://franz.com/agraph/allegrograph/> 173# * Uses only content negotiation (no URL parameters). 174# * The server always looks at the Accept header of a request, and tries to 175# generate a response in the format that the client asks for. If this fails, 176# a 406 response is returned. When no Accept, or an Accept of */* is specified, 177# the server prefers text/plain, in order to make it easy to explore the interface from a web browser. 178# * Accept header expected (values returned by server when a wrong header is sent): 179# ** SELECT 180# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent) 181# *** application/sparql-results+json (and application/json) 182# *** text/csv 183# *** text/tab-separated-values 184# *** OTHERS: application/sparql-results+ttl, text/integer, application/x-lisp-structured-expression, text/table, application/processed-csv, text/simple-csv, application/x-direct-upis 185# 186# ** ASK 187# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent) 188# *** application/sparql-results+json (and application/json) 189# *** Not supported: text/csv 190# *** Not supported: text/tab-separated-values 191# 192# ** CONSTRUCT 193# *** application/rdf+xml (DEFAULT if Accept: */* is sent) 194# *** text/rdf+n3 195# *** OTHERS: text/integer, application/json, text/plain, text/x-nquads, application/trix, text/table, application/x-direct-upis 196# 197# ** DESCRIBE 198# *** application/rdf+xml (DEFAULT if Accept: */* is sent) 199# *** text/rdf+n3 200# 201# See <https://franz.com/agraph/support/documentation/current/http-protocol.html> 202# 203################################################################################ 204# 205# - 4store. Code repository <https://github.com/4store/4store> documentation <https://4store.danielknoell.de/trac/wiki/SparqlServer/> 206# * Uses: Parameters AND Content Negotiation 207# * Parameter key: "output" 208# * Parameter value: alias. If an unexpected alias is used, the server is not working properly 209# * JSON-LD: NOT supported 210# 211# ** SELECT 212# *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 213# *** application/sparql-results+json or application/json (alias json) 214# *** text/csv (alias csv) 215# *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET. 216# *** Other values: text/plain, application/n-triples 217# 218# ** ASK 219# *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 220# *** application/sparql-results+json or application/json (alias json) 221# *** text/csv (alias csv) 222# *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET. 223# *** Other values: text/plain, application/n-triples 224# 225# ** CONSTRUCT 226# *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 227# *** text/turtle (alias "text") 228# 229# ** DESCRIBE 230# *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 231# *** text/turtle (alias "text") 232# 233# Valid alias for SELECT and ASK: "json", "xml", csv", "tsv" (also "text" and "ascii") 234# Valid alias for DESCRIBE and CONSTRUCT: "xml", "text" (for turtle) 235# Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 236# Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml 237# 238# 239################################################################################ 240# 241# - Blazegraph <https://www.blazegraph.com/> & NanoSparqlServer <https://wiki.blazegraph.com/wiki/index.php/NanoSparqlServer> <https://wiki.blazegraph.com/wiki/index.php/REST_API#SPARQL_End_Point> 242# * Formerly known as Bigdata 243# * Uses: Parameters AND Content Negotiation 244# * Parameter key: "format" (available since version 1.4.0). Setting this parameter will override any Accept Header that is present. <https://wiki.blazegraph.com/wiki/index.php/REST_API#GET_or_POST> 245# * Parameter value: alias. If an unexpected alias is used, the server is not working properly 246# 247# ** SELECT 248# *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 249# *** application/sparql-results+json or application/json (alias json) 250# *** text/csv 251# *** text/tab-separated-values 252# *** Other values: application/x-binary-rdf-results-table 253# 254# ** ASK 255# *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 256# *** application/sparql-results+json or application/json (alias json) 257# 258# ** CONSTRUCT 259# *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 260# *** text/turtle (returns text/n3) 261# *** text/n3 262# 263# ** DESCRIBE 264# *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 265# *** text/turtle (returns text/n3) 266# *** text/n3 267# 268# Valid alias for SELECT and ASK: "xml", "json" 269# Valid alias for DESCRIBE and CONSTRUCT: "xml", "json" (but it returns unexpected "application/sparql-results+json") 270# Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 271# Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml 272# 273################################################################################ 274# 275# - GraphDB <http://graphdb.ontotext.com/> <http://graphdb.ontotext.com/documentation/free/> 276# * Formerly known as OWLIM (OWLIM-Lite, OWLIM-SE) 277# * Uses: Only Content Negotiation. 278# * If the Accept value is not within the expected ones, the server returns a 406 "No acceptable file format found." 279# 280# ** SELECT 281# *** DEFAULT (if Accept: */* is sent): text/csv 282# *** application/sparql-results+xml, application/xml (.srx file) 283# *** application/sparql-results+json, application/json (.srj file) 284# *** text/csv (DEFAULT if Accept: */* is sent) 285# *** text/tab-separated-values 286# 287# ** ASK 288# *** DEFAULT (if Accept: */* is sent): application/sparql-results+json 289# *** application/sparql-results+xml, application/xml (.srx file) 290# *** application/sparql-results+json (DEFAULT if Accept: */* is sent), application/json (.srj file) 291# *** NOT supported: text/csv, text/tab-separated-values 292# 293# ** CONSTRUCT 294# *** DEFAULT (if Accept: */* is sent): application/n-triples 295# *** application/rdf+xml, application/xml (.rdf file) 296# *** text/turtle (.ttl file) 297# *** application/n-triples (.nt file) (DEFAULT if Accept: */* is sent) 298# *** text/n3, text/rdf+n3 (.n3 file) 299# *** application/ld+json (.jsonld file) 300# 301# ** DESCRIBE 302# *** DEFAULT (if Accept: */* is sent): application/n-triples 303# *** application/rdf+xml, application/xml (.rdf file) 304# *** text/turtle (.ttl file) 305# *** application/n-triples (.nt file) (DEFAULT if Accept: */* is sent) 306# *** text/n3, text/rdf+n3 (.n3 file) 307# *** application/ld+json (.jsonld file) 308# 309################################################################################ 310# 311# - Stardog <https://www.stardog.com> <https://www.stardog.com/docs/#_http_headers_content_type_accept> (the doc looks outdated) 312# * Uses: ONLY Content Negotiation 313# * Parameter: If an unexpected parameter is used, the server ignores it. 314# 315# ** SELECT 316# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)) 317# *** application/sparql-results+json 318# *** text/csv 319# *** text/tab-separated-values 320# *** Other values: application/x-binary-rdf-results-table 321# 322# ** ASK 323# *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)) 324# *** application/sparql-results+json 325# *** Other values: text/boolean 326# *** Not supported: text/csv 327# *** Not supported: text/tab-separated-values 328# 329# ** CONSTRUCT 330# *** application/rdf+xml 331# *** text/turtle (DEFAULT if Accept: */* is sent) 332# *** text/n3 333# *** application/ld+json 334# *** Other acceptable values: application/n-triples, application/x-turtle, application/trig, application/trix, application/n-quads 335# 336# ** DESCRIBE 337# *** application/rdf+xml 338# *** text/turtle (DEFAULT if Accept: */* is sent) 339# *** text/n3 340# *** application/ld+json 341# *** Other acceptable values: application/n-triples, application/x-turtle, application/trig, application/trix, application/n-quads 342# 343# Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 344# Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is text/turtle 345# 346################################################################################ 347 348# alias 349 350XML = "xml" 351"""to be used to set the return format to ``XML`` (``SPARQL Query Results XML`` format or ``RDF/XML``, depending on the query type). **This is the default**.""" 352JSON = "json" 353"""to be used to set the return format to ``JSON``.""" 354JSONLD = "json-ld" 355"""to be used to set the return format to ``JSON-LD``.""" 356TURTLE = "turtle" 357"""to be used to set the return format to ``Turtle``.""" 358N3 = "n3" 359"""to be used to set the return format to ``N3`` (for most of the SPARQL services this is equivalent to Turtle).""" 360RDF = "rdf" 361"""to be used to set the return ``RDF Graph``.""" 362RDFXML = "rdf+xml" 363"""to be used to set the return format to ``RDF/XML`` explicitly.""" 364CSV = "csv" 365"""to be used to set the return format to ``CSV``""" 366TSV = "tsv" 367"""to be used to set the return format to ``TSV``""" 368_allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV] 369 370# Possible HTTP methods 371GET = "GET" 372"""to be used to set HTTP method ``GET``. **This is the default**.""" 373POST = "POST" 374"""to be used to set HTTP method ``POST``.""" 375_allowedRequests = [POST, GET] 376 377# Possible HTTP Authentication methods 378BASIC = "BASIC" 379"""to be used to set ``BASIC`` HTTP Authentication method.""" 380DIGEST = "DIGEST" 381"""to be used to set ``DIGEST`` HTTP Authentication method.""" 382_allowedAuth = [BASIC, DIGEST] 383 384# Possible SPARQL/SPARUL query type (aka SPARQL Query forms) 385SELECT = "SELECT" 386"""to be used to set the query type to ``SELECT``. This is, usually, determined automatically.""" 387CONSTRUCT = "CONSTRUCT" 388"""to be used to set the query type to ``CONSTRUCT``. This is, usually, determined automatically.""" 389ASK = "ASK" 390"""to be used to set the query type to ``ASK``. This is, usually, determined automatically.""" 391DESCRIBE = "DESCRIBE" 392"""to be used to set the query type to ``DESCRIBE``. This is, usually, determined automatically.""" 393INSERT = "INSERT" 394"""to be used to set the query type to ``INSERT``. This is, usually, determined automatically.""" 395DELETE = "DELETE" 396"""to be used to set the query type to ``DELETE``. This is, usually, determined automatically.""" 397CREATE = "CREATE" 398"""to be used to set the query type to ``CREATE``. This is, usually, determined automatically.""" 399CLEAR = "CLEAR" 400"""to be used to set the query type to ``CLEAR``. This is, usually, determined automatically.""" 401DROP = "DROP" 402"""to be used to set the query type to ``DROP``. This is, usually, determined automatically.""" 403LOAD = "LOAD" 404"""to be used to set the query type to ``LOAD``. This is, usually, determined automatically.""" 405COPY = "COPY" 406"""to be used to set the query type to ``COPY``. This is, usually, determined automatically.""" 407MOVE = "MOVE" 408"""to be used to set the query type to ``MOVE``. This is, usually, determined automatically.""" 409ADD = "ADD" 410"""to be used to set the query type to ``ADD``. This is, usually, determined automatically.""" 411_allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP, 412 LOAD, COPY, MOVE, ADD] 413 414# Possible methods to perform requests 415URLENCODED = "urlencoded" 416"""to be used to set **URL encode** as the encoding method for the request. This is, usually, determined automatically.""" 417POSTDIRECTLY = "postdirectly" 418"""to be used to set **POST directly** as the encoding method for the request. This is, usually, determined automatically.""" 419_REQUEST_METHODS = [URLENCODED, POSTDIRECTLY] 420 421# Possible output format (mime types) that can be converted by the local script. Unfortunately, 422# it does not work by simply setting the return format, because there is still a certain level of confusion 423# among implementations. 424# For example, Joseki returns application/javascript and not the sparql-results+json thing that is required... 425# Ie, alternatives should be given... 426# Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of 427# joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle 428# mime types have just been proposed and not yet widely used... 429_SPARQL_DEFAULT = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] 430_SPARQL_XML = ["application/sparql-results+xml"] 431_SPARQL_JSON = ["application/sparql-results+json", "application/json", "text/javascript", "application/javascript"] # VIVO server returns "application/javascript" 432_RDF_XML = ["application/rdf+xml"] 433_RDF_TURTLE = ["application/turtle", "text/turtle"] 434_RDF_N3 = _RDF_TURTLE + ["text/rdf+n3", "application/n-triples", "application/n3", "text/n3"] 435_RDF_JSONLD = ["application/ld+json", "application/x-json+ld"] 436_CSV = ["text/csv"] 437_TSV = ["text/tab-separated-values"] 438_XML = ["application/xml"] 439_ALL = ["*/*"] 440_RDF_POSSIBLE = _RDF_XML + _RDF_N3 + _XML 441 442_SPARQL_PARAMS = ["query"] 443 444try: 445 import rdflib_jsonld 446 _allowedFormats.append(JSONLD) 447 _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD 448except ImportError: 449 #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning) 450 pass 451 452# This is very ugly. The fact is that the key for the choice of the output format is not defined. 453# Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum 454# told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have 455# info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a 456# future release, I can get rid of that. However, these processors are (hopefully) oblivious to the 457# parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! 458_returnFormatSetting = ["format", "output", "results"] 459 460####################################################################################################### 461 462 463class SPARQLWrapper(object): 464 """ 465 Wrapper around an online access to a SPARQL Web entry point. 466 467 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc, 468 are retained from one query to the next (in other words, only the query string changes). The instance can also be 469 reset to its initial values using the :meth:`resetQuery` method. 470 471 :ivar endpoint: SPARQL endpoint's URI. 472 :vartype endpoint: string 473 :ivar updateEndpoint: SPARQL endpoint's URI for SPARQL Update operations (if it's a different one). The **default** value is ``None``. 474 :vartype updateEndpoint: string 475 :ivar agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string using the SPARQLWrapper version code. 476 :vartype agent: string 477 :ivar _defaultGraph: URI for the default graph. The value can be set either via an explicit call :func:`addParameter("default-graph-uri", uri)<addParameter>` or as part of the query string. The **default** value is ``None``. 478 :vartype _defaultGraph: string 479 :ivar user: The username of the credentials for querying the current endpoint. The value can be set an explicit call :func:`setCredentials`. The **default** value is ``None``. 480 :vartype user: string 481 :ivar passwd: The password of the credentials for querying the current endpoint. The value can be set an explicit call :func:`setCredentials`. The **default** value is ``None``. 482 :vartype passwd: string 483 :ivar http_auth: HTTP Authentication type. The **default** value is :data:`BASIC`. Possible values are :data:`BASIC` or :data:`DIGEST`. It is used only in case the credentials are set. 484 :vartype http_auth: string 485 :ivar onlyConneg: Option for allowing (or not) **only** HTTP Content Negotiation (so dismiss the use of HTTP parameters). The default value is ``False``. 486 :vartype onlyConneg: boolean 487 :ivar customHttpHeaders: Custom HTTP Headers to be included in the request. It is a dictionary where keys are the header field and values are the header values. **Important**: These headers override previous values (including ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` if they are present). 488 :vartype customHttpHeaders: dict 489 :ivar timeout: The timeout (in seconds) to use for querying the endpoint. 490 :vartype timeout: int 491 :ivar queryString: The SPARQL query text. 492 :vartype queryString: string 493 :ivar queryType: The type of SPARQL query (aka SPARQL query form), like :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD` (constants in this module). 494 :vartype queryType: string 495 :ivar returnFormat: The return format.\ 496 No local check is done, so the parameter is simply sent to the endpoint. Eg, if the value is set to :data:`JSON` and a construct query is issued, it is up to the endpoint to react or not, this wrapper does not check.\ 497 The possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module).\ 498 The **default** value is :data:`XML`. 499 :vartype returnFormat: string 500 :ivar requestMethod: The request method for query or update operations. The possibles values are URL-encoded (:data:`URLENCODED`) or POST directly (:data:`POSTDIRECTLY`). 501 :vartype requestMethod: string 502 :ivar method: The invocation method (HTTP verb). The **default** value is :data:`GET`, but it can be set to :data:`POST`. 503 :vartype method: string 504 :ivar parameters: The parameters of the request (key/value pairs in a dictionary). 505 :vartype parameters: dict 506 :ivar _defaultReturnFormat: The default return format. It is used in case the same class instance is reused for subsequent queries. 507 :vartype _defaultReturnFormat: string 508 509 :cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type. 510 :vartype prefix_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python 511 :cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD`. 512 :vartype pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python 513 :cvar comments_pattern: regular expression used to remove comments from a query. 514 :vartype comments_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python 515 516 """ 517 prefix_pattern = re.compile(r"((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*") 518 # Maybe the future name could be queryType_pattern 519 pattern = re.compile(r"(?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))", re.VERBOSE | re.IGNORECASE) 520 comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") 521 522 def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__): 523 """ 524 Class encapsulating a full SPARQL call. 525 526 :param endpoint: SPARQL endpoint's URI. 527 :type endpoint: string 528 :param updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). The **default** value is ``None``. 529 :type updateEndpoint: string 530 :param returnFormat: The return format.\ 531 No local check is done, so the parameter is simply sent to the endpoint. Eg, if the value is set to :data:`JSON` and a construct query is issued, it is up to the endpoint to react or not, this wrapper does not check.\ 532 The possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module).\ 533 The **default** value is :data:`XML`. 534 :param defaultGraph: URI for the default graph. The value can be set either via an explicit call :func:`addParameter("default-graph-uri", uri)<addParameter>` or as part of the query string. The **default** value is ``None``. 535 :type defaultGraph: string 536 :param agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string using the SPARQLWrapper version number. 537 :type agent: string 538 """ 539 self.endpoint = endpoint 540 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint 541 self.agent = agent 542 self.user = None 543 self.passwd = None 544 self.http_auth = BASIC 545 self._defaultGraph = defaultGraph 546 self.onlyConneg = False # Only Content Negotiation 547 self.customHttpHeaders = {} 548 549 if returnFormat in _allowedFormats: 550 self._defaultReturnFormat = returnFormat 551 else: 552 self._defaultReturnFormat = XML 553 554 self.resetQuery() 555 556 def resetQuery(self): 557 """Reset the query, ie, return format, method, query, default or named graph settings, etc, 558 are reset to their default values. This includes the default values for parameters, method, timeout or requestMethod. 559 """ 560 self.parameters = {} 561 if self._defaultGraph: 562 self.addParameter("default-graph-uri", self._defaultGraph) 563 self.returnFormat = self._defaultReturnFormat 564 self.method = GET 565 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""") 566 self.timeout = None 567 self.requestMethod = URLENCODED 568 569 570 def setReturnFormat(self, format): 571 """Set the return format. If the one set is not an allowed value, the setting is ignored. 572 573 :param format: Possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module). All other cases are ignored. 574 :type format: string 575 :raises ValueError: If :data:`JSONLD` is tried to set and the current instance does not support ``JSON-LD``. 576 """ 577 if format in _allowedFormats: 578 self.returnFormat = format 579 elif format == JSONLD: 580 raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-jsonld package.") 581 else: 582 warnings.warn("Ignore format '%s'; current instance supports: %s." %(format, ", ".join(_allowedFormats)), SyntaxWarning) 583 584 def supportsReturnFormat(self, format): 585 """Check if a return format is supported. 586 587 :param format: Possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module). All other cases are ignored. 588 :type format: string 589 :return: Returns ``True`` if the return format is supported, otherwise ``False``. 590 :rtype: bool 591 """ 592 return (format in _allowedFormats) 593 594 def setTimeout(self, timeout): 595 """Set the timeout (in seconds) to use for querying the endpoint. 596 597 :param timeout: Timeout in seconds. 598 :type timeout: int 599 """ 600 self.timeout = int(timeout) 601 602 def setOnlyConneg(self, onlyConneg): 603 """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters). 604 605 .. versionadded:: 1.8.1 606 607 :param onlyConneg: ``True`` if **only** HTTP Content Negotiation is allowed; ``False`` if HTTP parameters are used. 608 :type onlyConneg: bool 609 """ 610 self.onlyConneg = onlyConneg 611 612 def setRequestMethod(self, method): 613 """Set the internal method to use to perform the request for query or 614 update operations, either URL-encoded (:data:`URLENCODED`) or 615 POST directly (:data:`POSTDIRECTLY`). 616 Further details at `query operation in SPARQL <http://www.w3.org/TR/sparql11-protocol/#query-operation>`_ 617 and `update operation in SPARQL Update <http://www.w3.org/TR/sparql11-protocol/#update-operation>`_. 618 619 :param method: Possible values are :data:`URLENCODED` (URL-encoded) or :data:`POSTDIRECTLY` (POST directly). All other cases are ignored. 620 :type method: string 621 """ 622 if method in _REQUEST_METHODS: 623 self.requestMethod = method 624 else: 625 warnings.warn("invalid update method '%s'" % method, RuntimeWarning) 626 627 def addDefaultGraph(self, uri): 628 """ 629 Add a default graph URI. 630 631 .. deprecated:: 1.6.0 Use :func:`addParameter("default-graph-uri", uri)<addParameter>` instead of this method. 632 633 :param uri: URI of the default graph. 634 :type uri: string 635 """ 636 self.addParameter("default-graph-uri", uri) 637 638 def addNamedGraph(self, uri): 639 """ 640 Add a named graph URI. 641 642 .. deprecated:: 1.6.0 Use :func:`addParameter("named-graph-uri", uri)<addParameter>` instead of this method. 643 644 :param uri: URI of the named graph. 645 :type uri: string 646 """ 647 self.addParameter("named-graph-uri", uri) 648 649 def addExtraURITag(self, key, value): 650 """ 651 Some SPARQL endpoints require extra key value pairs. 652 E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing 653 virtuoso to retrieve graphs that are not stored in its local database. 654 Alias of :func:`addParameter` method. 655 656 .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)<addParameter>` instead of this method 657 658 :param key: key of the query part. 659 :type key: string 660 :param value: value of the query part. 661 :type value: string 662 """ 663 self.addParameter(key, value) 664 665 def addCustomParameter(self, name, value): 666 """ 667 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding. 668 669 .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)<addParameter>` instead of this method 670 671 :param name: name. 672 :type name: string 673 :param value: value. 674 :type value: string 675 :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``. 676 :rtype: bool 677 """ 678 self.clearParameter(name) 679 return self.addParameter(name, value) 680 681 def addParameter(self, name, value): 682 """ 683 Some SPARQL endpoints allow extra key value pairs. 684 E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing 685 virtuoso to retrieve graphs that are not stored in its local database. 686 If the parameter :attr:`query` is tried to be set, this intent is dismissed. 687 Returns a boolean indicating if the set has been accomplished. 688 689 :param name: name. 690 :type name: string 691 :param value: value. 692 :type value: string 693 :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``. 694 :rtype: bool 695 """ 696 if name in _SPARQL_PARAMS: 697 return False 698 else: 699 if name not in self.parameters: 700 self.parameters[name] = [] 701 self.parameters[name].append(value) 702 return True 703 704 def addCustomHttpHeader(self, httpHeaderName, httpHeaderValue): 705 """ 706 Add a custom HTTP header (this method can override all HTTP headers). 707 708 **Important**: Take into account that each previous value for the header field names 709 ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` would be overriden 710 if the header field name is present as value of the parameter :attr:`httpHeaderName`. 711 712 .. versionadded:: 1.8.2 713 714 :param httpHeaderName: The header field name. 715 :type httpHeaderName: string 716 :param httpHeaderValue: The header field value. 717 :type httpHeaderValue: string 718 """ 719 self.customHttpHeaders[httpHeaderName] = httpHeaderValue 720 721 def clearCustomHttpHeader(self, httpHeaderName): 722 """ 723 Clear the values of a custom HTTP Header previously set. 724 Returns a boolean indicating if the clearing has been accomplished. 725 726 .. versionadded:: 1.8.2 727 728 :param httpHeaderName: HTTP header name. 729 :type httpHeaderName: string 730 :return: Returns ``True`` if the clearing has been accomplished, otherwise ``False``. 731 :rtype: bool 732 """ 733 try: 734 del self.customHttpHeaders[httpHeaderName] 735 return True 736 except KeyError: 737 return False 738 739 def clearParameter(self, name): 740 """ 741 Clear the values of a concrete parameter. 742 Returns a boolean indicating if the clearing has been accomplished. 743 744 :param name: name 745 :type name: string 746 :return: Returns ``True`` if the clearing has been accomplished, otherwise ``False``. 747 :rtype: bool 748 """ 749 if name in _SPARQL_PARAMS: 750 return False 751 else: 752 try: 753 del self.parameters[name] 754 return True 755 except KeyError: 756 return False 757 758 def setCredentials(self, user, passwd, realm="SPARQL"): 759 """ 760 Set the credentials for querying the current endpoint. 761 762 :param user: username. 763 :type user: string 764 :param passwd: password. 765 :type passwd: string 766 :param realm: realm. Only used for :data:`DIGEST` authentication. The **default** value is ``SPARQL`` 767 :type realm: string 768 769 .. versionchanged:: 1.8.3 770 Added :attr:`realm` parameter. 771 """ 772 self.user = user 773 self.passwd = passwd 774 self.realm = realm 775 776 def setHTTPAuth(self, auth): 777 """ 778 Set the HTTP Authentication type. Possible values are :class:`BASIC` or :class:`DIGEST`. 779 780 :param auth: auth type. 781 :type auth: string 782 :raises TypeError: If the :attr:`auth` parameter is not an string. 783 :raises ValueError: If the :attr:`auth` parameter has not one of the valid values: :class:`BASIC` or :class:`DIGEST`. 784 """ 785 if not isinstance(auth, str): 786 raise TypeError('setHTTPAuth takes a string') 787 elif auth.upper() in _allowedAuth: 788 self.http_auth = auth.upper() 789 else: 790 valid_types = ", ".join(_allowedAuth) 791 raise ValueError("Value should be one of {0}".format(valid_types)) 792 793 def setQuery(self, query): 794 """ 795 Set the SPARQL query text. 796 797 .. note:: 798 No check is done on the validity of the query 799 (syntax or otherwise) by this module, except for testing the query type (SELECT, 800 ASK, etc). Syntax and validity checking is done by the SPARQL service itself. 801 802 :param query: query text. 803 :type query: string 804 :raises TypeError: If the :attr:`query` parameter is not an unicode-string or utf-8 encoded byte-string. 805 """ 806 if sys.version < '3': # have to write it like this, for 2to3 compatibility 807 if isinstance(query, unicode): 808 pass 809 elif isinstance(query, str): 810 query = query.decode('utf-8') 811 else: 812 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 813 else: 814 if isinstance(query, str): 815 pass 816 elif isinstance(query, bytes): 817 query = query.decode('utf-8') 818 else: 819 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 820 821 self.queryString = query 822 self.queryType = self._parseQueryType(query) 823 824 def _parseQueryType(self, query): 825 """ 826 Internal method for parsing the SPARQL query and return its type (ie, :data:`SELECT`, :data:`ASK`, etc). 827 828 .. note:: 829 The method returns :data:`SELECT` if nothing is specified. This is just to get all other 830 methods running; in fact, this means that the query is erroneous, because the query must be, 831 according to the SPARQL specification. The 832 SPARQL endpoint should raise an exception (via :mod:`urllib`) for such syntax error. 833 834 :param query: query text. 835 :type query: string 836 :return: the type of SPARQL query (aka SPARQL query form). 837 :rtype: string 838 """ 839 try: 840 query = query if (isinstance(query, str)) else query.encode('ascii', 'ignore') 841 query = self._cleanComments(query) 842 query_for_queryType = re.sub(self.prefix_pattern, "", query.strip()) 843 r_queryType = self.pattern.search(query_for_queryType).group("queryType").upper() 844 except AttributeError: 845 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning) 846 r_queryType = None 847 848 if r_queryType in _allowedQueryTypes: 849 return r_queryType 850 else: 851 #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") 852 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) 853 return SELECT 854 855 def setMethod(self, method): 856 """Set the invocation method. By default, this is :data:`GET`, but can be set to :data:`POST`. 857 858 :param method: should be either :data:`GET` or :data:`POST`. Other cases are ignored. 859 :type method: string 860 """ 861 if method in _allowedRequests: 862 self.method = method 863 864 def setUseKeepAlive(self): 865 """Make :mod:`urllib2` use keep-alive. 866 867 :raises ImportError: when could not be imported ``keepalive.HTTPHandler``. 868 """ 869 try: 870 from keepalive import HTTPHandler 871 872 if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers): 873 # already installed 874 return 875 876 keepalive_handler = HTTPHandler() 877 opener = urllib2.build_opener(keepalive_handler) 878 urllib2.install_opener(opener) 879 except ImportError: 880 warnings.warn("keepalive support not available, so the execution of this method has no effect") 881 882 def isSparqlUpdateRequest(self): 883 """ Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Update request. 884 885 :return: Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Update request. 886 :rtype: bool 887 """ 888 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD] 889 890 def isSparqlQueryRequest(self): 891 """ Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Query request. 892 893 :return: Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Query request. 894 :rtype: bool 895 """ 896 return not self.isSparqlUpdateRequest() 897 898 def _cleanComments(self, query): 899 """ Internal method for returning the query after all occurrence of singleline comments are removed (issues #32 and #77). 900 901 :param query: The query. 902 :type query: string 903 :return: the query after all occurrence of singleline comments are removed. 904 :rtype: string 905 """ 906 return re.sub(self.comments_pattern, "\n\n", query) 907 908 def _getRequestEncodedParameters(self, query=None): 909 """ Internal method for getting the request encoded parameters. 910 911 :param query: a tuple of two items. The first item can be the string \ 912 ``query`` (for :data:`SELECT`, :data:`DESCRIBE`, :data:`ASK`, :data:`CONSTRUCT` query) or the string ``update`` \ 913 (for SPARQL Update queries, like :data:`DELETE` or :data:`INSERT`). The second item of the tuple \ 914 is the query string itself. 915 :type query: tuple 916 :return: the request encoded parameters. 917 :rtype: string 918 """ 919 query_parameters = self.parameters.copy() 920 921 # in case of query = tuple("query"/"update", queryString) 922 if query and (isinstance(query, tuple)) and len(query) == 2: 923 query_parameters[query[0]] = [query[1]] 924 925 if not self.isSparqlUpdateRequest(): 926 # This is very ugly. The fact is that the key for the choice of the output format is not defined. 927 # Virtuoso uses 'format',sparqler uses 'output' 928 # However, these processors are (hopefully) oblivious to the parameters they do not understand. 929 # So: just repeat all possibilities in the final URI. UGLY!!!!!!! 930 if not self.onlyConneg: 931 for f in _returnFormatSetting: 932 query_parameters[f] = [self.returnFormat] 933 # Virtuoso is not supporting a correct Accept header and an unexpected "output"/"format" parameter value. It returns a 406. 934 # "tsv", "rdf+xml" and "json-ld" are not supported as a correct "output"/"format" parameter value but "text/tab-separated-values" or "application/rdf+xml" are a valid values, 935 # and there is no problem to send both (4store does not support unexpected values). 936 if self.returnFormat in [TSV, JSONLD, RDFXML]: 937 acceptHeader = self._getAcceptHeader() # to obtain the mime-type "text/tab-separated-values" or "application/rdf+xml" 938 if "*/*" in acceptHeader: 939 acceptHeader = "" # clear the value in case of "*/*" 940 query_parameters[f] += [acceptHeader] 941 942 pairs = ( 943 "%s=%s" % ( 944 urllib.quote_plus(param.encode('UTF-8'), safe='/'), 945 urllib.quote_plus(value.encode('UTF-8'), safe='/') 946 ) 947 for param, values in query_parameters.items() for value in values 948 ) 949 return '&'.join(pairs) 950 951 def _getAcceptHeader(self): 952 """ Internal method for getting the HTTP Accept Header. 953 954 .. seealso:: `Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions <https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>`_ 955 """ 956 if self.queryType in [SELECT, ASK]: 957 if self.returnFormat == XML: 958 acceptHeader = ",".join(_SPARQL_XML) 959 elif self.returnFormat == JSON: 960 acceptHeader = ",".join(_SPARQL_JSON) 961 elif self.returnFormat == CSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 962 acceptHeader = ",".join(_CSV) 963 elif self.returnFormat == TSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 964 acceptHeader = ",".join(_TSV) 965 else: 966 acceptHeader = ",".join(_ALL) 967 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 968 elif self.queryType in [CONSTRUCT, DESCRIBE]: 969 if self.returnFormat == TURTLE: 970 acceptHeader = ",".join(_RDF_TURTLE) 971 elif self.returnFormat == N3: 972 acceptHeader = ",".join(_RDF_N3) 973 elif self.returnFormat == XML or self.returnFormat == RDFXML: 974 acceptHeader = ",".join(_RDF_XML) 975 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: 976 acceptHeader = ",".join(_RDF_JSONLD) 977 else: 978 acceptHeader = ",".join(_ALL) 979 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 980 elif self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]: 981 if self.returnFormat == XML: 982 acceptHeader = ",".join(_SPARQL_XML) 983 elif self.returnFormat == JSON: 984 acceptHeader = ",".join(_SPARQL_JSON) 985 else: 986 acceptHeader = ",".join(_ALL) 987 else: 988 acceptHeader = "*/*" 989 return acceptHeader 990 991 def _createRequest(self): 992 """Internal method to create request according a HTTP method. Returns a 993 :class:`urllib2.Request` object of the :mod:`urllib2` Python library 994 995 :raises NotImplementedError: If the HTTP authentification method is not one of the valid values: :data:`BASIC` or :data:`DIGEST`. 996 :return: request a :class:`urllib2.Request` object of the :mod:`urllib2` Python library 997 """ 998 request = None 999 1000 if self.isSparqlUpdateRequest(): 1001 #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation 1002 uri = self.updateEndpoint 1003 1004 if self.method != POST: 1005 warnings.warn("update operations MUST be done by POST") 1006 1007 if self.requestMethod == POSTDIRECTLY: 1008 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 1009 request.add_header("Content-Type", "application/sparql-update") 1010 request.data = self.queryString.encode('UTF-8') 1011 else: # URL-encoded 1012 request = urllib2.Request(uri) 1013 request.add_header("Content-Type", "application/x-www-form-urlencoded") 1014 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii') 1015 else: 1016 #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation 1017 uri = self.endpoint 1018 1019 if self.method == POST: 1020 if self.requestMethod == POSTDIRECTLY: 1021 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 1022 request.add_header("Content-Type", "application/sparql-query") 1023 request.data = self.queryString.encode('UTF-8') 1024 else: # URL-encoded 1025 request = urllib2.Request(uri) 1026 request.add_header("Content-Type", "application/x-www-form-urlencoded") 1027 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii') 1028 else: # GET 1029 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString))) 1030 1031 request.add_header("User-Agent", self.agent) 1032 request.add_header("Accept", self._getAcceptHeader()) 1033 if self.user and self.passwd: 1034 if self.http_auth == BASIC: 1035 credentials = "%s:%s" % (self.user, self.passwd) 1036 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8')) 1037 elif self.http_auth == DIGEST: 1038 realm = self.realm 1039 pwd_mgr = urllib2.HTTPPasswordMgr() 1040 pwd_mgr.add_password(realm, uri, self.user, self.passwd) 1041 opener = urllib2.build_opener() 1042 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr)) 1043 urllib2.install_opener(opener) 1044 else: 1045 valid_types = ", ".join(_allowedAuth) 1046 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types, 1047 self.http_auth)) 1048 1049 # The header field name is capitalized in the request.add_header method. 1050 for customHttpHeader in self.customHttpHeaders: 1051 request.add_header(customHttpHeader, self.customHttpHeaders[customHttpHeader]) 1052 1053 return request 1054 1055 def _query(self): 1056 """Internal method to execute the query. Returns the output of the 1057 :func:`urllib2.urlopen` method of the :mod:`urllib2` Python library 1058 1059 :return: tuples with the raw request plus the expected format. 1060 :raises QueryBadFormed: If the HTTP return code is ``400``. 1061 :raises Unauthorized: If the HTTP return code is ``401``. 1062 :raises EndPointNotFound: If the HTTP return code is ``404``. 1063 :raises URITooLong: If the HTTP return code is ``414``. 1064 :raises EndPointInternalError: If the HTTP return code is ``500``. 1065 :raises urllib2.HTTPError: If the HTTP return code is different to ``400``, ``401``, ``404``, ``414``, ``500``. 1066 """ 1067 request = self._createRequest() 1068 1069 try: 1070 if self.timeout: 1071 response = urlopener(request, timeout=self.timeout) 1072 else: 1073 response = urlopener(request) 1074 return response, self.returnFormat 1075 except urllib2.HTTPError as e: 1076 if e.code == 400: 1077 raise QueryBadFormed(e.read()) 1078 elif e.code == 404: 1079 raise EndPointNotFound(e.read()) 1080 elif e.code == 401: 1081 raise Unauthorized(e.read()) 1082 elif e.code == 414: 1083 raise URITooLong(e.read()) 1084 elif e.code == 500: 1085 raise EndPointInternalError(e.read()) 1086 else: 1087 raise e 1088 1089 def query(self): 1090 """ 1091 Execute the query. 1092 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the 1093 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). 1094 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. 1095 1096 Note that some combinations of return formats and query types may not make sense. For example, 1097 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT 1098 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization 1099 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is 1100 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall 1101 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which 1102 one that is.) 1103 1104 :return: query result 1105 :rtype: :class:`QueryResult` instance 1106 """ 1107 return QueryResult(self._query()) 1108 1109 def queryAndConvert(self): 1110 """Macro like method: issue a query and return the converted results. 1111 1112 :return: the converted query result. See the conversion methods for more details. 1113 """ 1114 res = self.query() 1115 return res.convert() 1116 1117 def __str__(self): 1118 """This method returns the string representation of a :class:`SPARQLWrapper` object. 1119 1120 .. versionadded:: 1.8.3 1121 1122 :return: A human-readable string of the object. 1123 :rtype: string 1124 """ 1125 fullname = self.__module__ + "." + self.__class__.__name__ 1126 items = ('"%s" : %r' % (k, v) for k, v in sorted(self.__dict__.items())) 1127 str_dict_items = "{%s}" % (',\n'.join(items)) 1128 return "<%s object at 0x%016X>\n%s" % (fullname, id(self), str_dict_items) 1129 1130 1131####################################################################################################### 1132 1133 1134class QueryResult(object): 1135 """ 1136 Wrapper around an a query result. Users should not create instances of this class, it is 1137 generated by a :func:`SPARQLWrapper.query` call. The results can be 1138 converted to various formats, or used directly. 1139 1140 If used directly: the class gives access to the direct HTTP request results 1141 ``response`` obtained from the call to :func:`urllib.urlopen`. 1142 It is a file-like object with two additional methods: 1143 1144 * ``geturl()`` to return the URL of the resource retrieved 1145 * ``info()`` that returns the meta-information of the HTTP result as a dictionary-like object. 1146 1147 For convenience, these methods are also available on the :class:`QueryResult` instance. 1148 1149 The :func:`__iter__` and :func:`next` methods are also implemented (by mapping them to :attr:`response`). This means that the 1150 common idiom ``for l in obj : do_something_with_line(l)`` would work, too. 1151 1152 :ivar response: the direct HTTP response; a file-like object, as return by the :func:`urllib2.urlopen` library call. 1153 :ivar requestedFormat: The requested format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`. 1154 :type requestedFormat: string 1155 1156 """ 1157 def __init__(self, result): 1158 """ 1159 :param result: HTTP response stemming from a :func:`SPARQLWrapper.query` call, or a tuple with the expected format: (response, format). 1160 """ 1161 if isinstance(result, tuple): 1162 self.response = result[0] 1163 self.requestedFormat = result[1] 1164 else: 1165 self.response = result 1166 1167 def geturl(self): 1168 """Return the URL of the original call. 1169 1170 :return: URL of the original call. 1171 :rtype: string 1172 """ 1173 return self.response.geturl() 1174 1175 def info(self): 1176 """Return the meta-information of the HTTP result. 1177 1178 :return: meta-information of the HTTP result. 1179 :rtype: dict 1180 """ 1181 return KeyCaseInsensitiveDict(self.response.info()) 1182 1183 def __iter__(self): 1184 """Return an iterator object. This method is expected for the inclusion 1185 of the object in a standard ``for`` loop. 1186 """ 1187 return self.response.__iter__() 1188 1189 def next(self): 1190 """Method for the standard iterator.""" 1191 return self.response.next() 1192 1193 def _convertJSON(self): 1194 """ 1195 Convert a JSON result into a Python dict. This method can be overwritten in a subclass 1196 for a different conversion method. 1197 1198 :return: converted result. 1199 :rtype: dict 1200 """ 1201 return json.loads(self.response.read().decode("utf-8")) 1202 1203 def _convertXML(self): 1204 """ 1205 Convert an XML result into a Python dom tree. This method can be overwritten in a 1206 subclass for a different conversion method. 1207 1208 :return: converted result. 1209 :rtype: :class:`xml.dom.minidom.Document` 1210 """ 1211 from xml.dom.minidom import parse 1212 return parse(self.response) 1213 1214 def _convertRDF(self): 1215 """ 1216 Convert a RDF/XML result into an RDFLib Graph. This method can be overwritten 1217 in a subclass for a different conversion method. 1218 1219 :return: converted result. 1220 :rtype: :class:`rdflib.graph.Graph` 1221 """ 1222 try: 1223 from rdflib.graph import ConjunctiveGraph 1224 except ImportError: 1225 from rdflib import ConjunctiveGraph 1226 retval = ConjunctiveGraph() 1227 # (DEPRECATED) this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny 1228 # (DEPRECATED) (and, as far as I could see, meaningless) error message... 1229 retval.load(self.response) # (DEPRECATED) publicID=' ') 1230 return retval 1231 1232 def _convertN3(self): 1233 """ 1234 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass 1235 for a different conversion method. 1236 1237 :return: converted result. 1238 :rtype: string 1239 """ 1240 return self.response.read() 1241 1242 def _convertCSV(self): 1243 """ 1244 Convert a CSV result into a string. This method can be overwritten in a subclass 1245 for a different conversion method. 1246 1247 :return: converted result. 1248 :rtype: string 1249 """ 1250 return self.response.read() 1251 1252 def _convertTSV(self): 1253 """ 1254 Convert a TSV result into a string. This method can be overwritten in a subclass 1255 for a different conversion method. 1256 1257 :return: converted result. 1258 :rtype: string 1259 """ 1260 return self.response.read() 1261 1262 def _convertJSONLD(self): 1263 """ 1264 Convert a RDF JSON-LD result into an RDFLib Graph. This method can be overwritten 1265 in a subclass for a different conversion method. 1266 1267 :return: converted result 1268 :rtype: :class:`rdflib.graph.Graph` 1269 """ 1270 from rdflib import ConjunctiveGraph 1271 retval = ConjunctiveGraph() 1272 retval.load(self.response, format='json-ld')# (DEPRECATED), publicID=' ') 1273 return retval 1274 1275 def convert(self): 1276 """ 1277 Encode the return value depending on the return format: 1278 1279 * in the case of :data:`XML`, a DOM top element is returned 1280 * in the case of :data:`JSON`, a json conversion will return a dictionary 1281 * in the case of :data:`RDF/XML<RDFXML>`, the value is converted via RDFLib into a ``RDFLib Graph`` instance 1282 * in the case of :data:`JSON-LD<JSONLD>`, the value is converted via RDFLib into a ``RDFLib Graph`` instance 1283 * in the case of RDF :data:`Turtle<TURTLE>`/:data:`N3`, a string is returned 1284 * in the case of :data:`CSV`/:data:`TSV`, a string is returned 1285 * In all other cases the input simply returned. 1286 1287 :return: the converted query result. See the conversion methods for more details. 1288 """ 1289 def _content_type_in_list(real, expected): 1290 """ Internal method for checking if the content-type header received matches any of the content types of the expected list. 1291 1292 :param real: The content-type header received. 1293 :type real: string 1294 :param expected: A list of expected content types. 1295 :type expected: list 1296 :return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list. 1297 :rtype: boolean 1298 """ 1299 return True in [real.find(mime) != -1 for mime in expected] 1300 1301 def _validate_format(format_name, allowed, mime, requested): 1302 """ Internal method for validating if the requested format is one of the allowed formats. 1303 1304 :param format_name: The format name (to be used in the warning message). 1305 :type format_name: string 1306 :param allowed: A list of allowed content types. 1307 :type allowed: list 1308 :param mime: The content-type header received (to be used in the warning message). 1309 :type mime: string 1310 :param requested: the requested format. 1311 :type requested: string 1312 """ 1313 if requested not in allowed: 1314 message = "Format requested was %s, but %s (%s) has been returned by the endpoint" 1315 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning) 1316 1317 # TODO. In order to compare properly, the requested QueryType (SPARQL Query Form) is needed. For instance, the unexpected N3 requested for a SELECT would return XML 1318 if "content-type" in self.info(): 1319 ct = self.info()["content-type"] # returned Content-Type value 1320 1321 if _content_type_in_list(ct, _SPARQL_XML): 1322 _validate_format("XML", [XML], ct, self.requestedFormat) 1323 return self._convertXML() 1324 elif _content_type_in_list(ct, _XML): 1325 _validate_format("XML", [XML], ct, self.requestedFormat) 1326 return self._convertXML() 1327 elif _content_type_in_list(ct, _SPARQL_JSON): 1328 _validate_format("JSON", [JSON], ct, self.requestedFormat) 1329 return self._convertJSON() 1330 elif _content_type_in_list(ct, _RDF_XML): 1331 _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat) 1332 return self._convertRDF() 1333 elif _content_type_in_list(ct, _RDF_N3): 1334 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat) 1335 return self._convertN3() 1336 elif _content_type_in_list(ct, _CSV): 1337 _validate_format("CSV", [CSV], ct, self.requestedFormat) 1338 return self._convertCSV() 1339 elif _content_type_in_list(ct, _TSV): 1340 _validate_format("TSV", [TSV], ct, self.requestedFormat) 1341 return self._convertTSV() 1342 elif _content_type_in_list(ct, _RDF_JSONLD): 1343 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat) 1344 return self._convertJSONLD() 1345 else: 1346 warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning) 1347 return self.response.read() 1348 1349 def _get_responseFormat(self): 1350 """ 1351 Get the response (return) format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`. 1352 In case there is no Content-Type, ``None`` is return. In all other cases, the raw Content-Type is return. 1353 1354 .. versionadded:: 1.8.3 1355 1356 :return: the response format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`. 1357 :rtype: string 1358 """ 1359 1360 def _content_type_in_list(real, expected): 1361 """ Internal method for checking if the content-type header received matches any of the content types of the expected list. 1362 1363 :param real: The content-type header received. 1364 :type real: string 1365 :param expected: A list of expected content types. 1366 :type expected: list 1367 :return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list. 1368 :rtype: boolean 1369 """ 1370 return True in [real.find(mime) != -1 for mime in expected] 1371 1372 if "content-type" in self.info(): 1373 ct = self.info()["content-type"] # returned Content-Type value 1374 1375 if _content_type_in_list(ct, _SPARQL_XML): 1376 return XML 1377 elif _content_type_in_list(ct, _XML): 1378 return XML 1379 elif _content_type_in_list(ct, _SPARQL_JSON): 1380 return JSON 1381 elif _content_type_in_list(ct, _RDF_XML): 1382 return RDFXML 1383 elif _content_type_in_list(ct, _RDF_TURTLE): 1384 return TURTLE 1385 elif _content_type_in_list(ct, _RDF_N3): 1386 return N3 1387 elif _content_type_in_list(ct, _CSV): 1388 return CSV 1389 elif _content_type_in_list(ct, _TSV): 1390 return TSV 1391 elif _content_type_in_list(ct, _RDF_JSONLD): 1392 return JSONLD 1393 else: 1394 warnings.warn("Unknown response content type. Returning raw content-type ('%s')." %(ct), RuntimeWarning) 1395 return ct 1396 return None 1397 1398 def print_results(self, minWidth=None): 1399 """This method prints a representation of a :class:`QueryResult` object that MUST has as response format :data:`JSON`. 1400 1401 :param minWidth: The minimum width, counting as characters. The default value is ``None``. 1402 :type minWidth: string 1403 """ 1404 1405 # Check if the requested format was JSON. If not, exit. 1406 responseFormat = self._get_responseFormat() 1407 if responseFormat != JSON: 1408 message = "Format return was %s, but JSON was expected. No printing." 1409 warnings.warn(message % (responseFormat), RuntimeWarning) 1410 return 1411 1412 results = self._convertJSON() 1413 if minWidth: 1414 width = self.__get_results_width(results, minWidth) 1415 else: 1416 width = self.__get_results_width(results) 1417 index = 0 1418 for var in results["head"]["vars"]: 1419 print ("?" + var).ljust(width[index]), "|", 1420 index += 1 1421 print 1422 print "=" * (sum(width) + 3 * len(width)) 1423 for result in results["results"]["bindings"]: 1424 index = 0 1425 for var in results["head"]["vars"]: 1426 result_value = self.__get_prettyprint_string_sparql_var_result(result[var]) 1427 print result_value.ljust(width[index]), "|", 1428 index += 1 1429 print 1430 1431 def __get_results_width(self, results, minWidth=2): 1432 width = [] 1433 for var in results["head"]["vars"]: 1434 width.append(max(minWidth, len(var)+1)) 1435 for result in results["results"]["bindings"]: 1436 index = 0 1437 for var in results["head"]["vars"]: 1438 result_value = self.__get_prettyprint_string_sparql_var_result(result[var]) 1439 width[index] = max(width[index], len(result_value)) 1440 index += 1 1441 return width 1442 1443 def __get_prettyprint_string_sparql_var_result(self, result): 1444 value = result["value"] 1445 lang = result.get("xml:lang", None) 1446 datatype = result.get("datatype", None) 1447 if lang is not None: 1448 value += "@"+lang 1449 if datatype is not None: 1450 value += " ["+datatype+"]" 1451 return value 1452 1453 def __str__(self): 1454 """This method returns the string representation of a :class:`QueryResult` object. 1455 1456 :return: A human-readable string of the object. 1457 :rtype: string 1458 .. versionadded:: 1.8.3 1459 """ 1460 fullname = self.__module__ + "." + self.__class__.__name__ 1461 str_requestedFormat = '"requestedFormat" : '+repr(self.requestedFormat) 1462 str_url = self.response.url 1463 str_code = self.response.code 1464 str_headers = self.response.info() 1465 str_response = '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "%s",\n\t"code" : "%s",\n\t"headers" : %s}' % (str_url, str_code, str_headers) 1466 return "<%s object at 0x%016X>\n{%s,\n%s}" % (fullname, id(self), str_requestedFormat, str_response) 1467