1# -*- coding: utf-8 -*-
2
3"""
4..
5  .. seealso:: `SPARQL Specification <http://www.w3.org/TR/rdf-sparql-query/>`_
6
7  Developers involved:
8
9  * Ivan Herman <http://www.ivan-herman.net>
10  * Sergio Fernández <http://www.wikier.org>
11  * Carlos Tejo Alonso <http://www.dayures.net>
12  * Alexey Zakhlestin <https://indeyets.ru/>
13
14  Organizations involved:
15
16  * `World Wide Web Consortium <http://www.w3.org>`_
17  * `Salzburg Research <http://www.salzburgresearch.at>`_
18  * `Foundation CTIC <http://www.fundacionctic.org/>`_
19
20  :license: `W3C® Software notice and license <http://www.w3.org/Consortium/Legal/copyright-software>`_
21
22  :requires: `RDFLib <https://rdflib.readthedocs.io>`_ package.
23"""
24
25import urllib
26import urllib2
27from urllib2 import urlopen as urlopener  # don't change the name: tests override it
28import base64
29import re
30import sys
31import warnings
32
33import json
34from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict
35from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError, Unauthorized, URITooLong
36from SPARQLWrapper import __agent__
37
38#  From <https://www.w3.org/TR/sparql11-protocol/#query-success>
39#  The response body of a successful query operation with a 2XX response is either:
40#  * SELECT and ASK: a SPARQL Results Document in XML, JSON, or CSV/TSV format.
41#  * DESCRIBE and CONSTRUCT: an RDF graph serialized, for example, in the RDF/XML syntax, or an equivalent RDF graph serialization.
42#
43#  Possible parameter keys and values...
44#  Examples:
45#  - ClioPatria: the SWI-Prolog Semantic Web Server <http://cliopatria.swi-prolog.org/home>
46#    * Parameter key: "format" <http://cliopatria.swi-prolog.org/help/http>
47#    * Parameter value must have one of these values: "rdf+xml", "json", "csv", "application/sparql-results+xml" or "application/sparql-results+json".
48#
49################################################################################
50#
51#  - OpenLink Virtuoso  <http://virtuoso.openlinksw.com>
52#    * Parameter key: "format" or "output"
53#    * Parameter value, like directly:
54#      "text/html" (HTML), "text/x-html+tr" (HTML (Faceted Browsing Links)), "application/vnd.ms-excel"
55#      "application/sparql-results+xml" (XML), "application/sparql-results+json", (JSON)
56#      "application/javascript" (Javascript), "text/turtle" (Turtle), "application/rdf+xml" (RDF/XML)
57#      "text/plain" (N-Triples), "text/csv" (CSV), "text/tab-separated-values" (TSV)
58#    * Parameter value, like indirectly:
59#      "HTML" (alias text/html), "JSON" (alias application/sparql-results+json), "XML" (alias application/sparql-results+xml), "TURTLE" (alias text/rdf+n3), JavaScript (alias application/javascript)
60#       See  <http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VOSSparqlProtocol#Additional HTTP Response Formats -- SELECT>
61#
62#      For a SELECT query type, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml
63#      For a ASK query type, the default return mimetype (if Accept: */* is sent) is text/html
64#      For a CONSTRUCT query type, the default return mimetype (if Accept: */* is sent) is text/turtle
65#      For a DESCRIBE query type, the default return mimetype (if Accept: */* is sent) is text/turtle
66#
67################################################################################
68#
69#  - Fuseki (formerly there was Joseki) <https://jena.apache.org/documentation/serving_data/>
70#    * Uses: Parameters AND Content Negotiation
71#    * Parameter key: "format" or "output"
72#    * JSON-LD (application/ld+json): supported (in CONSTRUCT and DESCRIBE)
73#
74#    * Parameter key: "format" or "output"
75#      See Fuseki 1: https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/HttpNames.java
76#      See Fuseki 2: https://github.com/apache/jena/blob/master/jena-arq/src/main/java/org/apache/jena/riot/web/HttpNames.java
77#    * Fuseki 1 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift"
78#      See <https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java>
79#    * Fuseki 2 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift"
80#      See <https://github.com/apache/jena/blob/master/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java>
81#      If a non-expected short name is used, the server returns an "Error 400: Can't determine output serialization"
82#      Valid alias for SELECT and ASK: "json", "xml", csv", "tsv"
83#      Valid alias for DESCRIBE and CONSTRUCT: "json" (alias for json-ld ONLY in Fuseki2), "xml"
84#      Valid mimetype for DESCRIBE and CONSTRUCT: "application/ld+json"
85#      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+json
86#      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is text/turtle
87#      In case of a bad formed query, Fuseki1 returns 200 instead of 400.
88#
89################################################################################
90#
91#  - Eclipse RDF4J <http://rdf4j.org/>
92#    * Formerly known as OpenRDF Sesame
93#    * Uses: ONLY Content Negotiation
94#    * See <https://rdf4j.eclipse.org/documentation/rest-api/#the-query-operation>
95#    * See <https://rdf4j.eclipse.org/documentation/rest-api/#content-types>
96#    * Parameter: If an unexpected parameter is used, the server ignores it.
97#
98#    ** SELECT
99#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent))
100#    *** application/sparql-results+json (also application/json)
101#    *** text/csv
102#    *** text/tab-separated-values
103#    *** Other values: application/x-binary-rdf-results-table
104#
105#    ** ASK
106#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent))
107#    *** application/sparql-results+json
108#    *** Other values: text/boolean
109#    *** Not supported: text/csv
110#    *** Not supported: text/tab-separated-values
111#
112#    ** CONSTRUCT
113#    *** application/rdf+xml
114#    *** application/n-triples (DEFAULT if Accept: */* is sent)
115#    *** text/turtle
116#    *** text/n3
117#    *** application/ld+json
118#    *** Other acceptable values: application/n-quads, application/rdf+json, application/trig, application/trix, application/x-binary-rdf
119#    *** text/plain (returns application/n-triples)
120#    *** text/rdf+n3 (returns text/n3)
121#    *** text/x-nquads (returns application/n-quads)
122#
123#    ** DESCRIBE
124#    *** application/rdf+xml
125#    *** application/n-triples (DEFAULT if Accept: */* is sent)
126#    *** text/turtle
127#    *** text/n3
128#    *** application/ld+json
129#    *** Other acceptable values: application/n-quads, application/rdf+json, application/trig, application/trix, application/x-binary-rdf
130#    *** text/plain (returns application/n-triples)
131#    *** text/rdf+n3 (returns text/n3)
132#    *** text/x-nquads (returns application/n-quads)
133#
134#      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml
135#      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/n-triples
136#
137#
138################################################################################
139#
140#  - RASQAL <http://librdf.org/rasqal/>
141#    * Parameter key: "results"
142#    * Uses roqet as RDF query utility
143#      For variable bindings, the values of FORMAT vary upon what Rasqal supports but include simple
144#      for a simple text format (default), xml for the SPARQL Query Results XML format, csv for SPARQL CSV,
145#      tsv for SPARQL TSV, rdfxml and turtle for RDF syntax formats, and json for a JSON version of the results.
146#
147#      For RDF graph results, the values of FORMAT are ntriples (N-Triples, default),
148#      rdfxml-abbrev (RDF/XML Abbreviated), rdfxml (RDF/XML), turtle (Turtle),
149#      json (RDF/JSON resource centric), json-triples (RDF/JSON triples) or
150#      rss-1.0 (RSS 1.0, also an RDF/XML syntax).
151#
152#      See <http://librdf.org/rasqal/roqet.html>
153#
154################################################################################
155#
156#  - Marklogic <http://marklogic.com>
157#    * Uses content negotiation (no URL parameters).
158#    * You can use following methods to query triples <https://docs.marklogic.com/guide/semantics/semantic-searches#chapter>:
159#      - SPARQL mode in Query Console. For details, see Querying Triples with SPARQL
160#      - XQuery using the semantics functions, and Search API, or a combination of XQuery and SPARQL. For details, see Querying Triples with XQuery or JavaScript.
161#      - HTTP via a SPARQL endpoint. For details, see Using Semantics with the REST Client API.
162#    * Formats are specified as part of the HTTP Accept headers of the REST request. <https://docs.marklogic.com/guide/semantics/REST#id_92428>
163#      - When you query the SPARQL endpoint with REST Client APIs, you can specify the result output format.  <https://docs.marklogic.com/guide/semantics/REST#id_54258>
164#        The response type format depends on the type of query and the MIME type in the HTTP Accept header.
165#      - This table describes the MIME types and Accept Header/Output formats (MIME type) for different types of SPARQL queries. See <https://docs.marklogic.com/guide/semantics/REST#id_54258> and <https://docs.marklogic.com/guide/semantics/loading#id_70682>
166#        SELECT "application/sparql-results+xml", "application/sparql-results+json", "text/html", "text/csv"
167#        CONSTRUCT or DESCRIBE "application/n-triples", "application/rdf+json", "application/rdf+xml", "text/turtle", "text/n3", "application/n-quads", "application/trig"
168#        ASK queries return a boolean (true or false).
169#
170################################################################################
171#
172#  - AllegroGraph <https://franz.com/agraph/allegrograph/>
173#    * Uses only content negotiation (no URL parameters).
174#    * The server always looks at the Accept header of a request, and tries to
175#      generate a response in the format that the client asks for. If this fails,
176#      a 406 response is returned. When no Accept, or an Accept of */* is specified,
177#      the server prefers text/plain, in order to make it easy to explore the interface from a web browser.
178#    * Accept header expected (values returned by server when a wrong header is sent):
179#    ** SELECT
180#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)
181#    *** application/sparql-results+json (and application/json)
182#    *** text/csv
183#    *** text/tab-separated-values
184#    *** OTHERS: application/sparql-results+ttl, text/integer, application/x-lisp-structured-expression, text/table, application/processed-csv, text/simple-csv, application/x-direct-upis
185#
186#    ** ASK
187#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent)
188#    *** application/sparql-results+json (and application/json)
189#    *** Not supported: text/csv
190#    *** Not supported: text/tab-separated-values
191#
192#    ** CONSTRUCT
193#    *** application/rdf+xml (DEFAULT if Accept: */* is sent)
194#    *** text/rdf+n3
195#    *** OTHERS: text/integer, application/json, text/plain, text/x-nquads, application/trix, text/table, application/x-direct-upis
196#
197#    ** DESCRIBE
198#    *** application/rdf+xml (DEFAULT if Accept: */* is sent)
199#    *** text/rdf+n3
200#
201#      See <https://franz.com/agraph/support/documentation/current/http-protocol.html>
202#
203################################################################################
204#
205#  - 4store. Code repository <https://github.com/4store/4store> documentation <https://4store.danielknoell.de/trac/wiki/SparqlServer/>
206#    * Uses: Parameters AND Content Negotiation
207#    * Parameter key: "output"
208#    * Parameter value: alias. If an unexpected alias is used, the server is not working properly
209#    * JSON-LD: NOT supported
210#
211#    ** SELECT
212#    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent))
213#    *** application/sparql-results+json or application/json (alias json)
214#    *** text/csv (alias csv)
215#    *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET.
216#    *** Other values: text/plain, application/n-triples
217#
218#    ** ASK
219#    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent))
220#    *** application/sparql-results+json or application/json (alias json)
221#    *** text/csv (alias csv)
222#    *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET.
223#    *** Other values: text/plain, application/n-triples
224#
225#    ** CONSTRUCT
226#    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent)
227#    *** text/turtle (alias "text")
228#
229#    ** DESCRIBE
230#    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent)
231#    *** text/turtle (alias "text")
232#
233#      Valid alias for SELECT and ASK: "json", "xml", csv", "tsv" (also "text" and "ascii")
234#      Valid alias for DESCRIBE and CONSTRUCT: "xml", "text" (for turtle)
235#      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml
236#      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml
237#
238#
239################################################################################
240#
241#  - Blazegraph <https://www.blazegraph.com/> & NanoSparqlServer <https://wiki.blazegraph.com/wiki/index.php/NanoSparqlServer> <https://wiki.blazegraph.com/wiki/index.php/REST_API#SPARQL_End_Point>
242#    * Formerly known as Bigdata
243#    * Uses: Parameters AND Content Negotiation
244#    * Parameter key: "format" (available since version 1.4.0). Setting this parameter will override any Accept Header that is present. <https://wiki.blazegraph.com/wiki/index.php/REST_API#GET_or_POST>
245#    * Parameter value: alias. If an unexpected alias is used, the server is not working properly
246#
247#    ** SELECT
248#    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent))
249#    *** application/sparql-results+json or application/json (alias json)
250#    *** text/csv
251#    *** text/tab-separated-values
252#    *** Other values: application/x-binary-rdf-results-table
253#
254#    ** ASK
255#    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent))
256#    *** application/sparql-results+json or application/json (alias json)
257#
258#    ** CONSTRUCT
259#    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent)
260#    *** text/turtle (returns text/n3)
261#    *** text/n3
262#
263#    ** DESCRIBE
264#    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent)
265#    *** text/turtle (returns text/n3)
266#    *** text/n3
267#
268#      Valid alias for SELECT and ASK: "xml", "json"
269#      Valid alias for DESCRIBE and CONSTRUCT: "xml", "json" (but it returns unexpected "application/sparql-results+json")
270#      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml
271#      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml
272#
273################################################################################
274#
275#  - GraphDB <http://graphdb.ontotext.com/> <http://graphdb.ontotext.com/documentation/free/>
276#    * Formerly known as OWLIM (OWLIM-Lite, OWLIM-SE)
277#    * Uses: Only Content Negotiation.
278#    * If the Accept value is not within the expected ones, the server returns a 406 "No acceptable file format found."
279#
280#    ** SELECT
281#    *** DEFAULT (if Accept: */* is sent): text/csv
282#    *** application/sparql-results+xml, application/xml (.srx file)
283#    *** application/sparql-results+json, application/json (.srj file)
284#    *** text/csv (DEFAULT if Accept: */* is sent)
285#    *** text/tab-separated-values
286#
287#    ** ASK
288#    *** DEFAULT (if Accept: */* is sent): application/sparql-results+json
289#    *** application/sparql-results+xml, application/xml (.srx file)
290#    *** application/sparql-results+json (DEFAULT if Accept: */* is sent), application/json (.srj file)
291#    *** NOT supported: text/csv, text/tab-separated-values
292#
293#    ** CONSTRUCT
294#    *** DEFAULT (if Accept: */* is sent): application/n-triples
295#    *** application/rdf+xml, application/xml (.rdf file)
296#    *** text/turtle (.ttl file)
297#    *** application/n-triples (.nt file) (DEFAULT if Accept: */* is sent)
298#    *** text/n3, text/rdf+n3 (.n3 file)
299#    *** application/ld+json (.jsonld file)
300#
301#    ** DESCRIBE
302#    *** DEFAULT (if Accept: */* is sent): application/n-triples
303#    *** application/rdf+xml, application/xml (.rdf file)
304#    *** text/turtle (.ttl file)
305#    *** application/n-triples (.nt file) (DEFAULT if Accept: */* is sent)
306#    *** text/n3, text/rdf+n3 (.n3 file)
307#    *** application/ld+json (.jsonld file)
308#
309################################################################################
310#
311#  - Stardog <https://www.stardog.com> <https://www.stardog.com/docs/#_http_headers_content_type_accept> (the doc looks outdated)
312#    * Uses: ONLY Content Negotiation
313#    * Parameter: If an unexpected parameter is used, the server ignores it.
314#
315#    ** SELECT
316#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent))
317#    *** application/sparql-results+json
318#    *** text/csv
319#    *** text/tab-separated-values
320#    *** Other values: application/x-binary-rdf-results-table
321#
322#    ** ASK
323#    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent))
324#    *** application/sparql-results+json
325#    *** Other values: text/boolean
326#    *** Not supported: text/csv
327#    *** Not supported: text/tab-separated-values
328#
329#    ** CONSTRUCT
330#    *** application/rdf+xml
331#    *** text/turtle (DEFAULT if Accept: */* is sent)
332#    *** text/n3
333#    *** application/ld+json
334#    *** Other acceptable values: application/n-triples, application/x-turtle, application/trig, application/trix, application/n-quads
335#
336#    ** DESCRIBE
337#    *** application/rdf+xml
338#    *** text/turtle (DEFAULT if Accept: */* is sent)
339#    *** text/n3
340#    *** application/ld+json
341#    *** Other acceptable values: application/n-triples, application/x-turtle, application/trig, application/trix, application/n-quads
342#
343#      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml
344#      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is text/turtle
345#
346################################################################################
347
348# alias
349
350XML    = "xml"
351"""to be used to set the return format to ``XML`` (``SPARQL Query Results XML`` format or ``RDF/XML``, depending on the query type). **This is the default**."""
352JSON   = "json"
353"""to be used to set the return format to ``JSON``."""
354JSONLD = "json-ld"
355"""to be used to set the return format to ``JSON-LD``."""
356TURTLE = "turtle"
357"""to be used to set the return format to ``Turtle``."""
358N3     = "n3"
359"""to be used to set the return format to ``N3`` (for most of the SPARQL services this is equivalent to Turtle)."""
360RDF    = "rdf"
361"""to be used to set the return ``RDF Graph``."""
362RDFXML = "rdf+xml"
363"""to be used to set the return format to ``RDF/XML`` explicitly."""
364CSV    = "csv"
365"""to be used to set the return format to ``CSV``"""
366TSV    = "tsv"
367"""to be used to set the return format to ``TSV``"""
368_allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV]
369
370# Possible HTTP methods
371GET = "GET"
372"""to be used to set HTTP method ``GET``. **This is the default**."""
373POST = "POST"
374"""to be used to set HTTP method ``POST``."""
375_allowedRequests = [POST, GET]
376
377# Possible HTTP Authentication methods
378BASIC = "BASIC"
379"""to be used to set ``BASIC`` HTTP Authentication method."""
380DIGEST = "DIGEST"
381"""to be used to set ``DIGEST`` HTTP Authentication method."""
382_allowedAuth = [BASIC, DIGEST]
383
384# Possible SPARQL/SPARUL query type (aka SPARQL Query forms)
385SELECT     = "SELECT"
386"""to be used to set the query type to ``SELECT``. This is, usually, determined automatically."""
387CONSTRUCT  = "CONSTRUCT"
388"""to be used to set the query type to ``CONSTRUCT``. This is, usually, determined automatically."""
389ASK        = "ASK"
390"""to be used to set the query type to ``ASK``. This is, usually, determined automatically."""
391DESCRIBE   = "DESCRIBE"
392"""to be used to set the query type to ``DESCRIBE``. This is, usually, determined automatically."""
393INSERT     = "INSERT"
394"""to be used to set the query type to ``INSERT``. This is, usually, determined automatically."""
395DELETE     = "DELETE"
396"""to be used to set the query type to ``DELETE``. This is, usually, determined automatically."""
397CREATE     = "CREATE"
398"""to be used to set the query type to ``CREATE``. This is, usually, determined automatically."""
399CLEAR      = "CLEAR"
400"""to be used to set the query type to ``CLEAR``. This is, usually, determined automatically."""
401DROP       = "DROP"
402"""to be used to set the query type to ``DROP``. This is, usually, determined automatically."""
403LOAD       = "LOAD"
404"""to be used to set the query type to ``LOAD``. This is, usually, determined automatically."""
405COPY       = "COPY"
406"""to be used to set the query type to ``COPY``. This is, usually, determined automatically."""
407MOVE       = "MOVE"
408"""to be used to set the query type to ``MOVE``. This is, usually, determined automatically."""
409ADD        = "ADD"
410"""to be used to set the query type to ``ADD``. This is, usually, determined automatically."""
411_allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP,
412                      LOAD, COPY, MOVE, ADD]
413
414# Possible methods to perform requests
415URLENCODED = "urlencoded"
416"""to be used to set **URL encode** as the encoding method for the request. This is, usually, determined automatically."""
417POSTDIRECTLY = "postdirectly"
418"""to be used to set **POST directly** as the encoding method for the request. This is, usually, determined automatically."""
419_REQUEST_METHODS = [URLENCODED, POSTDIRECTLY]
420
421# Possible output format (mime types) that can be converted by the local script. Unfortunately,
422# it does not work by simply setting the return format, because there is still a certain level of confusion
423# among implementations.
424# For example, Joseki returns application/javascript and not the sparql-results+json thing that is required...
425# Ie, alternatives should be given...
426# Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of
427# joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle
428# mime types have just been proposed and not yet widely used...
429_SPARQL_DEFAULT  = ["application/sparql-results+xml", "application/rdf+xml", "*/*"]
430_SPARQL_XML      = ["application/sparql-results+xml"]
431_SPARQL_JSON     = ["application/sparql-results+json", "application/json", "text/javascript", "application/javascript"] # VIVO server returns "application/javascript"
432_RDF_XML         = ["application/rdf+xml"]
433_RDF_TURTLE      = ["application/turtle", "text/turtle"]
434_RDF_N3          = _RDF_TURTLE + ["text/rdf+n3", "application/n-triples", "application/n3", "text/n3"]
435_RDF_JSONLD      = ["application/ld+json", "application/x-json+ld"]
436_CSV             = ["text/csv"]
437_TSV             = ["text/tab-separated-values"]
438_XML             = ["application/xml"]
439_ALL             = ["*/*"]
440_RDF_POSSIBLE    = _RDF_XML + _RDF_N3 + _XML
441
442_SPARQL_PARAMS = ["query"]
443
444try:
445    import rdflib_jsonld
446    _allowedFormats.append(JSONLD)
447    _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD
448except ImportError:
449    #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning)
450    pass
451
452# This is very ugly. The fact is that the key for the choice of the output format is not defined.
453# Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum
454# told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have
455# info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a
456# future release, I can get rid of that. However, these processors are (hopefully) oblivious to the
457# parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!!
458_returnFormatSetting = ["format", "output", "results"]
459
460#######################################################################################################
461
462
463class SPARQLWrapper(object):
464    """
465    Wrapper around an online access to a SPARQL Web entry point.
466
467    The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc,
468    are retained from one query to the next (in other words, only the query string changes). The instance can also be
469    reset to its initial values using the :meth:`resetQuery` method.
470
471    :ivar endpoint: SPARQL endpoint's URI.
472    :vartype endpoint: string
473    :ivar updateEndpoint: SPARQL endpoint's URI for SPARQL Update operations (if it's a different one). The **default** value is ``None``.
474    :vartype updateEndpoint: string
475    :ivar agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string using the SPARQLWrapper version code.
476    :vartype agent: string
477    :ivar _defaultGraph: URI for the default graph. The value can be set either via an explicit call :func:`addParameter("default-graph-uri", uri)<addParameter>` or as part of the query string. The **default** value is ``None``.
478    :vartype _defaultGraph: string
479    :ivar user: The username of the credentials for querying the current endpoint. The value can be set an explicit call :func:`setCredentials`. The **default** value is ``None``.
480    :vartype user: string
481    :ivar passwd: The password of the credentials for querying the current endpoint. The value can be set an explicit call :func:`setCredentials`. The **default** value is ``None``.
482    :vartype passwd: string
483    :ivar http_auth: HTTP Authentication type. The **default** value is :data:`BASIC`. Possible values are :data:`BASIC` or :data:`DIGEST`. It is used only in case the credentials are set.
484    :vartype http_auth: string
485    :ivar onlyConneg: Option for allowing (or not) **only** HTTP Content Negotiation (so dismiss the use of HTTP parameters). The default value is ``False``.
486    :vartype onlyConneg: boolean
487    :ivar customHttpHeaders: Custom HTTP Headers to be included in the request. It is a dictionary where keys are the header field and values are the header values. **Important**: These headers override previous values (including ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` if they are present).
488    :vartype customHttpHeaders: dict
489    :ivar timeout: The timeout (in seconds) to use for querying the endpoint.
490    :vartype timeout: int
491    :ivar queryString: The SPARQL query text.
492    :vartype queryString: string
493    :ivar queryType: The type of SPARQL query (aka SPARQL query form), like :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD` (constants in this module).
494    :vartype queryType: string
495    :ivar returnFormat: The return format.\
496    No local check is done, so the parameter is simply sent to the endpoint. Eg, if the value is set to :data:`JSON` and a construct query is issued, it is up to the endpoint to react or not, this wrapper does not check.\
497    The possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module).\
498    The **default** value is :data:`XML`.
499    :vartype returnFormat: string
500    :ivar requestMethod: The request method for query or update operations. The possibles values are URL-encoded (:data:`URLENCODED`) or POST directly (:data:`POSTDIRECTLY`).
501    :vartype requestMethod: string
502    :ivar method: The invocation method (HTTP verb).  The **default** value is :data:`GET`, but it can be set to :data:`POST`.
503    :vartype method: string
504    :ivar parameters: The parameters of the request (key/value pairs in a dictionary).
505    :vartype parameters: dict
506    :ivar _defaultReturnFormat: The default return format. It is used in case the same class instance is reused for subsequent queries.
507    :vartype _defaultReturnFormat: string
508
509    :cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type.
510    :vartype prefix_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python
511    :cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD`.
512    :vartype pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python
513    :cvar comments_pattern: regular expression used to remove comments from a query.
514    :vartype comments_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python
515
516    """
517    prefix_pattern = re.compile(r"((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*")
518    # Maybe the future name could be queryType_pattern
519    pattern = re.compile(r"(?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))", re.VERBOSE | re.IGNORECASE)
520    comments_pattern = re.compile(r"(^|\n)\s*#.*?\n")
521
522    def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
523        """
524        Class encapsulating a full SPARQL call.
525
526        :param endpoint: SPARQL endpoint's URI.
527        :type endpoint: string
528        :param updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). The **default** value is ``None``.
529        :type updateEndpoint: string
530        :param returnFormat: The return format.\
531        No local check is done, so the parameter is simply sent to the endpoint. Eg, if the value is set to :data:`JSON` and a construct query is issued, it is up to the endpoint to react or not, this wrapper does not check.\
532        The possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module).\
533        The **default** value is :data:`XML`.
534        :param defaultGraph: URI for the default graph. The value can be set either via an explicit call :func:`addParameter("default-graph-uri", uri)<addParameter>` or as part of the query string. The **default** value is ``None``.
535        :type defaultGraph: string
536        :param agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string using the SPARQLWrapper version number.
537        :type agent: string
538        """
539        self.endpoint = endpoint
540        self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint
541        self.agent = agent
542        self.user = None
543        self.passwd = None
544        self.http_auth = BASIC
545        self._defaultGraph = defaultGraph
546        self.onlyConneg = False # Only Content Negotiation
547        self.customHttpHeaders = {}
548
549        if returnFormat in _allowedFormats:
550            self._defaultReturnFormat = returnFormat
551        else:
552            self._defaultReturnFormat = XML
553
554        self.resetQuery()
555
556    def resetQuery(self):
557        """Reset the query, ie, return format, method, query, default or named graph settings, etc,
558        are reset to their default values. This includes the default values for parameters, method, timeout or requestMethod.
559        """
560        self.parameters = {}
561        if self._defaultGraph:
562            self.addParameter("default-graph-uri", self._defaultGraph)
563        self.returnFormat = self._defaultReturnFormat
564        self.method = GET
565        self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""")
566        self.timeout = None
567        self.requestMethod = URLENCODED
568
569
570    def setReturnFormat(self, format):
571        """Set the return format. If the one set is not an allowed value, the setting is ignored.
572
573        :param format: Possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module). All other cases are ignored.
574        :type format: string
575        :raises ValueError: If :data:`JSONLD` is tried to set and the current instance does not support ``JSON-LD``.
576        """
577        if format in _allowedFormats:
578            self.returnFormat = format
579        elif format == JSONLD:
580            raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-jsonld package.")
581        else:
582            warnings.warn("Ignore format '%s'; current instance supports: %s." %(format, ", ".join(_allowedFormats)), SyntaxWarning)
583
584    def supportsReturnFormat(self, format):
585        """Check if a return format is supported.
586
587        :param format: Possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module). All other cases are ignored.
588        :type format: string
589        :return: Returns ``True`` if the return format is supported, otherwise ``False``.
590        :rtype: bool
591        """
592        return (format in _allowedFormats)
593
594    def setTimeout(self, timeout):
595        """Set the timeout (in seconds) to use for querying the endpoint.
596
597        :param timeout: Timeout in seconds.
598        :type timeout: int
599        """
600        self.timeout = int(timeout)
601
602    def setOnlyConneg(self, onlyConneg):
603        """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).
604
605        .. versionadded:: 1.8.1
606
607        :param onlyConneg: ``True`` if **only** HTTP Content Negotiation is allowed; ``False`` if HTTP parameters are used.
608        :type onlyConneg: bool
609        """
610        self.onlyConneg = onlyConneg
611
612    def setRequestMethod(self, method):
613        """Set the internal method to use to perform the request for query or
614        update operations, either URL-encoded (:data:`URLENCODED`) or
615        POST directly (:data:`POSTDIRECTLY`).
616        Further details at `query operation in SPARQL <http://www.w3.org/TR/sparql11-protocol/#query-operation>`_
617        and `update operation in SPARQL Update <http://www.w3.org/TR/sparql11-protocol/#update-operation>`_.
618
619        :param method: Possible values are :data:`URLENCODED` (URL-encoded) or :data:`POSTDIRECTLY` (POST directly). All other cases are ignored.
620        :type method: string
621        """
622        if method in _REQUEST_METHODS:
623            self.requestMethod = method
624        else:
625            warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
626
627    def addDefaultGraph(self, uri):
628        """
629            Add a default graph URI.
630
631            .. deprecated:: 1.6.0 Use :func:`addParameter("default-graph-uri", uri)<addParameter>` instead of this method.
632
633            :param uri: URI of the default graph.
634            :type uri: string
635        """
636        self.addParameter("default-graph-uri", uri)
637
638    def addNamedGraph(self, uri):
639        """
640            Add a named graph URI.
641
642            .. deprecated:: 1.6.0 Use :func:`addParameter("named-graph-uri", uri)<addParameter>` instead of this method.
643
644            :param uri: URI of the named graph.
645            :type uri: string
646        """
647        self.addParameter("named-graph-uri", uri)
648
649    def addExtraURITag(self, key, value):
650        """
651            Some SPARQL endpoints require extra key value pairs.
652            E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing
653            virtuoso to retrieve graphs that are not stored in its local database.
654            Alias of :func:`addParameter` method.
655
656            .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)<addParameter>` instead of this method
657
658            :param key: key of the query part.
659            :type key: string
660            :param value: value of the query part.
661            :type value: string
662        """
663        self.addParameter(key, value)
664
665    def addCustomParameter(self, name, value):
666        """
667            Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding.
668
669            .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)<addParameter>` instead of this method
670
671            :param name: name.
672            :type name: string
673            :param value: value.
674            :type value: string
675            :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``.
676            :rtype: bool
677        """
678        self.clearParameter(name)
679        return self.addParameter(name, value)
680
681    def addParameter(self, name, value):
682        """
683            Some SPARQL endpoints allow extra key value pairs.
684            E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing
685            virtuoso to retrieve graphs that are not stored in its local database.
686            If the parameter :attr:`query` is tried to be set, this intent is dismissed.
687            Returns a boolean indicating if the set has been accomplished.
688
689            :param name: name.
690            :type name: string
691            :param value: value.
692            :type value: string
693            :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``.
694            :rtype: bool
695        """
696        if name in _SPARQL_PARAMS:
697            return False
698        else:
699            if name not in self.parameters:
700                self.parameters[name] = []
701            self.parameters[name].append(value)
702            return True
703
704    def addCustomHttpHeader(self, httpHeaderName, httpHeaderValue):
705        """
706            Add a custom HTTP header (this method can override all HTTP headers).
707
708            **Important**: Take into account that each previous value for the header field names
709            ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` would be overriden
710            if the header field name is present as value of the parameter :attr:`httpHeaderName`.
711
712            .. versionadded:: 1.8.2
713
714            :param httpHeaderName: The header field name.
715            :type httpHeaderName: string
716            :param httpHeaderValue: The header field value.
717            :type httpHeaderValue: string
718        """
719        self.customHttpHeaders[httpHeaderName] = httpHeaderValue
720
721    def clearCustomHttpHeader(self, httpHeaderName):
722        """
723            Clear the values of a custom HTTP Header previously set.
724            Returns a boolean indicating if the clearing has been accomplished.
725
726            .. versionadded:: 1.8.2
727
728            :param httpHeaderName: HTTP header name.
729            :type httpHeaderName: string
730            :return: Returns ``True`` if the clearing has been accomplished, otherwise ``False``.
731            :rtype: bool
732        """
733        try:
734            del self.customHttpHeaders[httpHeaderName]
735            return True
736        except KeyError:
737            return False
738
739    def clearParameter(self, name):
740        """
741            Clear the values of a concrete parameter.
742            Returns a boolean indicating if the clearing has been accomplished.
743
744            :param name: name
745            :type name: string
746            :return: Returns ``True`` if the clearing has been accomplished, otherwise ``False``.
747            :rtype: bool
748        """
749        if name in _SPARQL_PARAMS:
750            return False
751        else:
752            try:
753                del self.parameters[name]
754                return True
755            except KeyError:
756                return False
757
758    def setCredentials(self, user, passwd, realm="SPARQL"):
759        """
760            Set the credentials for querying the current endpoint.
761
762            :param user: username.
763            :type user: string
764            :param passwd: password.
765            :type passwd: string
766            :param realm: realm. Only used for :data:`DIGEST` authentication. The **default** value is ``SPARQL``
767            :type realm: string
768
769            .. versionchanged:: 1.8.3
770               Added :attr:`realm` parameter.
771        """
772        self.user = user
773        self.passwd = passwd
774        self.realm = realm
775
776    def setHTTPAuth(self, auth):
777        """
778            Set the HTTP Authentication type. Possible values are :class:`BASIC` or :class:`DIGEST`.
779
780            :param auth: auth type.
781            :type auth: string
782            :raises TypeError: If the :attr:`auth` parameter is not an string.
783            :raises ValueError: If the :attr:`auth` parameter has not one of the valid values: :class:`BASIC` or :class:`DIGEST`.
784        """
785        if not isinstance(auth, str):
786            raise TypeError('setHTTPAuth takes a string')
787        elif auth.upper() in _allowedAuth:
788            self.http_auth = auth.upper()
789        else:
790            valid_types = ", ".join(_allowedAuth)
791            raise ValueError("Value should be one of {0}".format(valid_types))
792
793    def setQuery(self, query):
794        """
795            Set the SPARQL query text.
796
797            .. note::
798              No check is done on the validity of the query
799              (syntax or otherwise) by this module, except for testing the query type (SELECT,
800              ASK, etc). Syntax and validity checking is done by the SPARQL service itself.
801
802            :param query: query text.
803            :type query: string
804            :raises TypeError: If the :attr:`query` parameter is not an unicode-string or utf-8 encoded byte-string.
805        """
806        if sys.version < '3':  # have to write it like this, for 2to3 compatibility
807            if isinstance(query, unicode):
808                pass
809            elif isinstance(query, str):
810                query = query.decode('utf-8')
811            else:
812                raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
813        else:
814            if isinstance(query, str):
815                pass
816            elif isinstance(query, bytes):
817                query = query.decode('utf-8')
818            else:
819                raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
820
821        self.queryString = query
822        self.queryType = self._parseQueryType(query)
823
824    def _parseQueryType(self, query):
825        """
826            Internal method for parsing the SPARQL query and return its type (ie, :data:`SELECT`, :data:`ASK`, etc).
827
828            .. note::
829              The method returns :data:`SELECT` if nothing is specified. This is just to get all other
830              methods running; in fact, this means that the query is erroneous, because the query must be,
831              according to the SPARQL specification. The
832              SPARQL endpoint should raise an exception (via :mod:`urllib`) for such syntax error.
833
834            :param query: query text.
835            :type query: string
836            :return: the type of SPARQL query (aka SPARQL query form).
837            :rtype: string
838        """
839        try:
840            query = query if (isinstance(query, str)) else query.encode('ascii', 'ignore')
841            query = self._cleanComments(query)
842            query_for_queryType = re.sub(self.prefix_pattern, "", query.strip())
843            r_queryType = self.pattern.search(query_for_queryType).group("queryType").upper()
844        except AttributeError:
845            warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning)
846            r_queryType = None
847
848        if r_queryType in _allowedQueryTypes:
849            return r_queryType
850        else:
851            #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT")
852            warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning)
853            return SELECT
854
855    def setMethod(self, method):
856        """Set the invocation method. By default, this is :data:`GET`, but can be set to :data:`POST`.
857
858        :param method: should be either :data:`GET` or :data:`POST`. Other cases are ignored.
859        :type method: string
860        """
861        if method in _allowedRequests:
862            self.method = method
863
864    def setUseKeepAlive(self):
865        """Make :mod:`urllib2` use keep-alive.
866
867        :raises ImportError: when could not be imported ``keepalive.HTTPHandler``.
868        """
869        try:
870            from keepalive import HTTPHandler
871
872            if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers):
873                # already installed
874                return
875
876            keepalive_handler = HTTPHandler()
877            opener = urllib2.build_opener(keepalive_handler)
878            urllib2.install_opener(opener)
879        except ImportError:
880            warnings.warn("keepalive support not available, so the execution of this method has no effect")
881
882    def isSparqlUpdateRequest(self):
883        """ Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Update request.
884
885        :return: Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Update request.
886        :rtype: bool
887        """
888        return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
889
890    def isSparqlQueryRequest(self):
891        """ Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Query request.
892
893        :return: Returns ``True`` if SPARQLWrapper is configured for executing SPARQL Query request.
894        :rtype: bool
895        """
896        return not self.isSparqlUpdateRequest()
897
898    def _cleanComments(self, query):
899        """ Internal method for returning the query after all occurrence of singleline comments are removed (issues #32 and #77).
900
901        :param query: The query.
902        :type query: string
903        :return: the query after all occurrence of singleline comments are removed.
904        :rtype: string
905        """
906        return re.sub(self.comments_pattern, "\n\n", query)
907
908    def _getRequestEncodedParameters(self, query=None):
909        """ Internal method for getting the request encoded parameters.
910
911        :param query: a tuple of two items. The first item can be the string \
912        ``query`` (for :data:`SELECT`, :data:`DESCRIBE`, :data:`ASK`, :data:`CONSTRUCT` query) or the string ``update`` \
913        (for SPARQL Update queries, like :data:`DELETE` or :data:`INSERT`). The second item of the tuple \
914        is the query string itself.
915        :type query: tuple
916        :return: the request encoded parameters.
917        :rtype: string
918        """
919        query_parameters = self.parameters.copy()
920
921        # in case of query = tuple("query"/"update", queryString)
922        if query and (isinstance(query, tuple)) and len(query) == 2:
923            query_parameters[query[0]] = [query[1]]
924
925        if not self.isSparqlUpdateRequest():
926            # This is very ugly. The fact is that the key for the choice of the output format is not defined.
927            # Virtuoso uses 'format',sparqler uses 'output'
928            # However, these processors are (hopefully) oblivious to the parameters they do not understand.
929            # So: just repeat all possibilities in the final URI. UGLY!!!!!!!
930            if not self.onlyConneg:
931                for f in _returnFormatSetting:
932                    query_parameters[f] = [self.returnFormat]
933                    # Virtuoso is not supporting a correct Accept header and an unexpected "output"/"format" parameter value. It returns a 406.
934                    # "tsv", "rdf+xml" and "json-ld" are not supported as a correct "output"/"format" parameter value but "text/tab-separated-values" or "application/rdf+xml" are a valid values,
935                    # and there is no problem to send both (4store does not support unexpected values).
936                    if self.returnFormat in [TSV, JSONLD, RDFXML]:
937                        acceptHeader = self._getAcceptHeader() # to obtain the mime-type "text/tab-separated-values" or "application/rdf+xml"
938                        if "*/*" in acceptHeader:
939                            acceptHeader = "" # clear the value in case of "*/*"
940                        query_parameters[f] += [acceptHeader]
941
942        pairs = (
943            "%s=%s" % (
944                urllib.quote_plus(param.encode('UTF-8'), safe='/'),
945                urllib.quote_plus(value.encode('UTF-8'), safe='/')
946            )
947            for param, values in query_parameters.items() for value in values
948        )
949        return '&'.join(pairs)
950
951    def _getAcceptHeader(self):
952        """ Internal method for getting the HTTP Accept Header.
953
954        .. seealso:: `Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions <https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>`_
955        """
956        if self.queryType in [SELECT, ASK]:
957            if self.returnFormat == XML:
958                acceptHeader = ",".join(_SPARQL_XML)
959            elif self.returnFormat == JSON:
960                acceptHeader = ",".join(_SPARQL_JSON)
961            elif self.returnFormat == CSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/)
962                acceptHeader = ",".join(_CSV)
963            elif self.returnFormat == TSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/)
964                acceptHeader = ",".join(_TSV)
965            else:
966                acceptHeader = ",".join(_ALL)
967                warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
968        elif self.queryType in [CONSTRUCT, DESCRIBE]:
969            if self.returnFormat == TURTLE:
970                acceptHeader = ",".join(_RDF_TURTLE)
971            elif self.returnFormat == N3:
972                acceptHeader = ",".join(_RDF_N3)
973            elif self.returnFormat == XML or self.returnFormat == RDFXML:
974                acceptHeader = ",".join(_RDF_XML)
975            elif self.returnFormat == JSONLD and JSONLD in _allowedFormats:
976                acceptHeader = ",".join(_RDF_JSONLD)
977            else:
978                acceptHeader = ",".join(_ALL)
979                warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
980        elif self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]:
981            if self.returnFormat == XML:
982                acceptHeader = ",".join(_SPARQL_XML)
983            elif self.returnFormat == JSON:
984                acceptHeader = ",".join(_SPARQL_JSON)
985            else:
986                acceptHeader = ",".join(_ALL)
987        else:
988            acceptHeader = "*/*"
989        return acceptHeader
990
991    def _createRequest(self):
992        """Internal method to create request according a HTTP method. Returns a
993        :class:`urllib2.Request` object of the :mod:`urllib2` Python library
994
995        :raises NotImplementedError: If the HTTP authentification method is not one of the valid values: :data:`BASIC` or :data:`DIGEST`.
996        :return: request a :class:`urllib2.Request` object of the :mod:`urllib2` Python library
997        """
998        request = None
999
1000        if self.isSparqlUpdateRequest():
1001            #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation
1002            uri = self.updateEndpoint
1003
1004            if self.method != POST:
1005                warnings.warn("update operations MUST be done by POST")
1006
1007            if self.requestMethod == POSTDIRECTLY:
1008                request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
1009                request.add_header("Content-Type", "application/sparql-update")
1010                request.data = self.queryString.encode('UTF-8')
1011            else:  # URL-encoded
1012                request = urllib2.Request(uri)
1013                request.add_header("Content-Type", "application/x-www-form-urlencoded")
1014                request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii')
1015        else:
1016            #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation
1017            uri = self.endpoint
1018
1019            if self.method == POST:
1020                if self.requestMethod == POSTDIRECTLY:
1021                    request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
1022                    request.add_header("Content-Type", "application/sparql-query")
1023                    request.data = self.queryString.encode('UTF-8')
1024                else:  # URL-encoded
1025                    request = urllib2.Request(uri)
1026                    request.add_header("Content-Type", "application/x-www-form-urlencoded")
1027                    request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii')
1028            else:  # GET
1029                request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString)))
1030
1031        request.add_header("User-Agent", self.agent)
1032        request.add_header("Accept", self._getAcceptHeader())
1033        if self.user and self.passwd:
1034            if self.http_auth == BASIC:
1035                credentials = "%s:%s" % (self.user, self.passwd)
1036                request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8'))
1037            elif self.http_auth == DIGEST:
1038                realm = self.realm
1039                pwd_mgr = urllib2.HTTPPasswordMgr()
1040                pwd_mgr.add_password(realm, uri, self.user, self.passwd)
1041                opener = urllib2.build_opener()
1042                opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr))
1043                urllib2.install_opener(opener)
1044            else:
1045                valid_types = ", ".join(_allowedAuth)
1046                raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types,
1047                                                                                            self.http_auth))
1048
1049        # The header field name is capitalized in the request.add_header method.
1050        for customHttpHeader in self.customHttpHeaders:
1051            request.add_header(customHttpHeader, self.customHttpHeaders[customHttpHeader])
1052
1053        return request
1054
1055    def _query(self):
1056        """Internal method to execute the query. Returns the output of the
1057        :func:`urllib2.urlopen` method of the :mod:`urllib2` Python library
1058
1059        :return: tuples with the raw request plus the expected format.
1060        :raises QueryBadFormed: If the HTTP return code is ``400``.
1061        :raises Unauthorized: If the HTTP return code is ``401``.
1062        :raises EndPointNotFound: If the HTTP return code is ``404``.
1063        :raises URITooLong: If the HTTP return code is ``414``.
1064        :raises EndPointInternalError: If the HTTP return code is ``500``.
1065        :raises urllib2.HTTPError: If the HTTP return code is different to ``400``, ``401``, ``404``, ``414``, ``500``.
1066        """
1067        request = self._createRequest()
1068
1069        try:
1070            if self.timeout:
1071                response = urlopener(request, timeout=self.timeout)
1072            else:
1073                response = urlopener(request)
1074            return response, self.returnFormat
1075        except urllib2.HTTPError as e:
1076            if e.code == 400:
1077                raise QueryBadFormed(e.read())
1078            elif e.code == 404:
1079                raise EndPointNotFound(e.read())
1080            elif e.code == 401:
1081                raise Unauthorized(e.read())
1082            elif e.code == 414:
1083                raise URITooLong(e.read())
1084            elif e.code == 500:
1085                raise EndPointInternalError(e.read())
1086            else:
1087                raise e
1088
1089    def query(self):
1090        """
1091            Execute the query.
1092            Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
1093            case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint).
1094            The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too.
1095
1096            Note that some combinations of return formats and query types may not make sense. For example,
1097            a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT
1098            query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization
1099            of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is
1100            unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall
1101            back to one of the "meaningful" formats, but it is up to the specific implementation to choose which
1102            one that is.)
1103
1104            :return: query result
1105            :rtype: :class:`QueryResult` instance
1106        """
1107        return QueryResult(self._query())
1108
1109    def queryAndConvert(self):
1110        """Macro like method: issue a query and return the converted results.
1111
1112        :return: the converted query result. See the conversion methods for more details.
1113        """
1114        res = self.query()
1115        return res.convert()
1116
1117    def __str__(self):
1118        """This method returns the string representation of a :class:`SPARQLWrapper` object.
1119
1120        .. versionadded:: 1.8.3
1121
1122        :return: A human-readable string of the object.
1123        :rtype: string
1124        """
1125        fullname = self.__module__ + "." + self.__class__.__name__
1126        items = ('"%s" : %r' % (k, v) for k, v in sorted(self.__dict__.items()))
1127        str_dict_items = "{%s}" % (',\n'.join(items))
1128        return "<%s object at 0x%016X>\n%s" % (fullname, id(self), str_dict_items)
1129
1130
1131#######################################################################################################
1132
1133
1134class QueryResult(object):
1135    """
1136    Wrapper around an a query result. Users should not create instances of this class, it is
1137    generated by a :func:`SPARQLWrapper.query` call. The results can be
1138    converted to various formats, or used directly.
1139
1140    If used directly: the class gives access to the direct HTTP request results
1141    ``response`` obtained from the call to :func:`urllib.urlopen`.
1142    It is a file-like object with two additional methods:
1143
1144    * ``geturl()`` to return the URL of the resource retrieved
1145    * ``info()`` that returns the meta-information of the HTTP result as a dictionary-like object.
1146
1147    For convenience, these methods are also available on the :class:`QueryResult` instance.
1148
1149    The :func:`__iter__` and :func:`next` methods are also implemented (by mapping them to :attr:`response`). This means that the
1150    common idiom ``for l in obj : do_something_with_line(l)`` would work, too.
1151
1152    :ivar response: the direct HTTP response; a file-like object, as return by the :func:`urllib2.urlopen` library call.
1153    :ivar requestedFormat: The requested format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`.
1154    :type requestedFormat: string
1155
1156    """
1157    def __init__(self, result):
1158        """
1159        :param result: HTTP response stemming from a :func:`SPARQLWrapper.query` call, or a tuple with the expected format: (response, format).
1160        """
1161        if isinstance(result, tuple):
1162            self.response = result[0]
1163            self.requestedFormat = result[1]
1164        else:
1165            self.response = result
1166
1167    def geturl(self):
1168        """Return the URL of the original call.
1169
1170        :return: URL of the original call.
1171        :rtype: string
1172        """
1173        return self.response.geturl()
1174
1175    def info(self):
1176        """Return the meta-information of the HTTP result.
1177
1178        :return: meta-information of the HTTP result.
1179        :rtype: dict
1180        """
1181        return KeyCaseInsensitiveDict(self.response.info())
1182
1183    def __iter__(self):
1184        """Return an iterator object. This method is expected for the inclusion
1185        of the object in a standard ``for`` loop.
1186        """
1187        return self.response.__iter__()
1188
1189    def next(self):
1190        """Method for the standard iterator."""
1191        return self.response.next()
1192
1193    def _convertJSON(self):
1194        """
1195        Convert a JSON result into a Python dict. This method can be overwritten in a subclass
1196        for a different conversion method.
1197
1198        :return: converted result.
1199        :rtype: dict
1200        """
1201        return json.loads(self.response.read().decode("utf-8"))
1202
1203    def _convertXML(self):
1204        """
1205        Convert an XML result into a Python dom tree. This method can be overwritten in a
1206        subclass for a different conversion method.
1207
1208        :return: converted result.
1209        :rtype: :class:`xml.dom.minidom.Document`
1210        """
1211        from xml.dom.minidom import parse
1212        return parse(self.response)
1213
1214    def _convertRDF(self):
1215        """
1216        Convert a RDF/XML result into an RDFLib Graph. This method can be overwritten
1217        in a subclass for a different conversion method.
1218
1219        :return: converted result.
1220        :rtype: :class:`rdflib.graph.Graph`
1221        """
1222        try:
1223            from rdflib.graph import ConjunctiveGraph
1224        except ImportError:
1225            from rdflib import ConjunctiveGraph
1226        retval = ConjunctiveGraph()
1227        # (DEPRECATED) this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny
1228        # (DEPRECATED) (and, as far as I could see, meaningless) error message...
1229        retval.load(self.response) # (DEPRECATED) publicID=' ')
1230        return retval
1231
1232    def _convertN3(self):
1233        """
1234        Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass
1235        for a different conversion method.
1236
1237        :return: converted result.
1238        :rtype: string
1239        """
1240        return self.response.read()
1241
1242    def _convertCSV(self):
1243        """
1244        Convert a CSV result into a string. This method can be overwritten in a subclass
1245        for a different conversion method.
1246
1247        :return: converted result.
1248        :rtype: string
1249        """
1250        return self.response.read()
1251
1252    def _convertTSV(self):
1253        """
1254        Convert a TSV result into a string. This method can be overwritten in a subclass
1255        for a different conversion method.
1256
1257        :return: converted result.
1258        :rtype: string
1259        """
1260        return self.response.read()
1261
1262    def _convertJSONLD(self):
1263        """
1264        Convert a RDF JSON-LD result into an RDFLib Graph. This method can be overwritten
1265        in a subclass for a different conversion method.
1266
1267        :return: converted result
1268        :rtype: :class:`rdflib.graph.Graph`
1269        """
1270        from rdflib import ConjunctiveGraph
1271        retval = ConjunctiveGraph()
1272        retval.load(self.response, format='json-ld')# (DEPRECATED), publicID=' ')
1273        return retval
1274
1275    def convert(self):
1276        """
1277        Encode the return value depending on the return format:
1278
1279            * in the case of :data:`XML`, a DOM top element is returned
1280            * in the case of :data:`JSON`, a json conversion will return a dictionary
1281            * in the case of :data:`RDF/XML<RDFXML>`, the value is converted via RDFLib into a ``RDFLib Graph`` instance
1282            * in the case of :data:`JSON-LD<JSONLD>`, the value is converted via RDFLib into a ``RDFLib Graph`` instance
1283            * in the case of RDF :data:`Turtle<TURTLE>`/:data:`N3`, a string is returned
1284            * in the case of :data:`CSV`/:data:`TSV`, a string is returned
1285            * In all other cases the input simply returned.
1286
1287        :return: the converted query result. See the conversion methods for more details.
1288        """
1289        def _content_type_in_list(real, expected):
1290            """ Internal method for checking if the content-type header received matches any of the content types of the expected list.
1291
1292            :param real: The content-type header received.
1293            :type real: string
1294            :param expected: A list of expected content types.
1295            :type expected: list
1296            :return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list.
1297            :rtype: boolean
1298            """
1299            return True in [real.find(mime) != -1 for mime in expected]
1300
1301        def _validate_format(format_name, allowed, mime, requested):
1302            """ Internal method for validating if the requested format is one of the allowed formats.
1303
1304            :param format_name: The format name (to be used in the warning message).
1305            :type format_name: string
1306            :param allowed: A list of allowed content types.
1307            :type allowed: list
1308            :param mime: The content-type header received (to be used in the warning message).
1309            :type mime: string
1310            :param requested: the requested format.
1311            :type requested: string
1312            """
1313            if requested not in allowed:
1314                message = "Format requested was %s, but %s (%s) has been returned by the endpoint"
1315                warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
1316
1317        # TODO. In order to compare properly, the requested QueryType (SPARQL Query Form) is needed. For instance, the unexpected N3 requested for a SELECT would return XML
1318        if "content-type" in self.info():
1319            ct = self.info()["content-type"] # returned Content-Type value
1320
1321            if _content_type_in_list(ct, _SPARQL_XML):
1322                _validate_format("XML", [XML], ct, self.requestedFormat)
1323                return self._convertXML()
1324            elif _content_type_in_list(ct, _XML):
1325                _validate_format("XML", [XML], ct, self.requestedFormat)
1326                return self._convertXML()
1327            elif _content_type_in_list(ct, _SPARQL_JSON):
1328                _validate_format("JSON", [JSON], ct, self.requestedFormat)
1329                return self._convertJSON()
1330            elif _content_type_in_list(ct, _RDF_XML):
1331                _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat)
1332                return self._convertRDF()
1333            elif _content_type_in_list(ct, _RDF_N3):
1334                _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat)
1335                return self._convertN3()
1336            elif _content_type_in_list(ct, _CSV):
1337                _validate_format("CSV", [CSV], ct, self.requestedFormat)
1338                return self._convertCSV()
1339            elif _content_type_in_list(ct, _TSV):
1340                _validate_format("TSV", [TSV], ct, self.requestedFormat)
1341                return self._convertTSV()
1342            elif _content_type_in_list(ct, _RDF_JSONLD):
1343                _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat)
1344                return self._convertJSONLD()
1345            else:
1346                warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning)
1347        return self.response.read()
1348
1349    def _get_responseFormat(self):
1350        """
1351        Get the response (return) format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`.
1352        In case there is no Content-Type, ``None`` is return. In all other cases, the raw Content-Type is return.
1353
1354        .. versionadded:: 1.8.3
1355
1356        :return: the response format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, :data:`TURTLE`, :data:`N3`, :data:`CSV`, :data:`TSV`, :data:`JSONLD`.
1357        :rtype: string
1358        """
1359
1360        def _content_type_in_list(real, expected):
1361            """ Internal method for checking if the content-type header received matches any of the content types of the expected list.
1362
1363            :param real: The content-type header received.
1364            :type real: string
1365            :param expected: A list of expected content types.
1366            :type expected: list
1367            :return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list.
1368            :rtype: boolean
1369            """
1370            return True in [real.find(mime) != -1 for mime in expected]
1371
1372        if "content-type" in self.info():
1373            ct = self.info()["content-type"] # returned Content-Type value
1374
1375            if _content_type_in_list(ct, _SPARQL_XML):
1376                return XML
1377            elif _content_type_in_list(ct, _XML):
1378                return XML
1379            elif _content_type_in_list(ct, _SPARQL_JSON):
1380                return JSON
1381            elif _content_type_in_list(ct, _RDF_XML):
1382                return RDFXML
1383            elif _content_type_in_list(ct, _RDF_TURTLE):
1384                return TURTLE
1385            elif _content_type_in_list(ct, _RDF_N3):
1386                return N3
1387            elif _content_type_in_list(ct, _CSV):
1388                return CSV
1389            elif _content_type_in_list(ct, _TSV):
1390                return TSV
1391            elif _content_type_in_list(ct, _RDF_JSONLD):
1392                return JSONLD
1393            else:
1394                warnings.warn("Unknown response content type. Returning raw content-type ('%s')." %(ct), RuntimeWarning)
1395                return ct
1396        return None
1397
1398    def print_results(self, minWidth=None):
1399        """This method prints a representation of a :class:`QueryResult` object that MUST has as response format :data:`JSON`.
1400
1401        :param minWidth: The minimum width, counting as characters. The default value is ``None``.
1402        :type minWidth: string
1403        """
1404
1405        # Check if the requested format was JSON. If not, exit.
1406        responseFormat = self._get_responseFormat()
1407        if responseFormat != JSON:
1408            message = "Format return was %s, but JSON was expected. No printing."
1409            warnings.warn(message % (responseFormat), RuntimeWarning)
1410            return
1411
1412        results = self._convertJSON()
1413        if minWidth:
1414            width = self.__get_results_width(results, minWidth)
1415        else:
1416            width = self.__get_results_width(results)
1417        index = 0
1418        for var in results["head"]["vars"]:
1419            print ("?" + var).ljust(width[index]), "|",
1420            index += 1
1421        print
1422        print "=" * (sum(width) + 3 * len(width))
1423        for result in results["results"]["bindings"]:
1424            index = 0
1425            for var in results["head"]["vars"]:
1426                result_value = self.__get_prettyprint_string_sparql_var_result(result[var])
1427                print result_value.ljust(width[index]), "|",
1428                index += 1
1429            print
1430
1431    def __get_results_width(self, results, minWidth=2):
1432        width = []
1433        for var in results["head"]["vars"]:
1434            width.append(max(minWidth, len(var)+1))
1435        for result in results["results"]["bindings"]:
1436            index = 0
1437            for var in results["head"]["vars"]:
1438                result_value = self.__get_prettyprint_string_sparql_var_result(result[var])
1439                width[index] = max(width[index], len(result_value))
1440                index += 1
1441        return width
1442
1443    def __get_prettyprint_string_sparql_var_result(self, result):
1444        value = result["value"]
1445        lang = result.get("xml:lang", None)
1446        datatype = result.get("datatype", None)
1447        if lang is not None:
1448            value += "@"+lang
1449        if datatype is not None:
1450            value += " ["+datatype+"]"
1451        return value
1452
1453    def __str__(self):
1454        """This method returns the string representation of a :class:`QueryResult` object.
1455
1456        :return: A human-readable string of the object.
1457        :rtype: string
1458        .. versionadded:: 1.8.3
1459        """
1460        fullname = self.__module__ + "." + self.__class__.__name__
1461        str_requestedFormat = '"requestedFormat" : '+repr(self.requestedFormat)
1462        str_url = self.response.url
1463        str_code = self.response.code
1464        str_headers = self.response.info()
1465        str_response = '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "%s",\n\t"code" : "%s",\n\t"headers" : %s}' % (str_url, str_code, str_headers)
1466        return "<%s object at 0x%016X>\n{%s,\n%s}" % (fullname, id(self), str_requestedFormat, str_response)
1467