1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_format_sparql_xml.c - Read and write formats in SPARQL XML
4  *
5  * Copyright (C) 2007-2014, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 
25 #ifdef HAVE_CONFIG_H
26 #include <rasqal_config.h>
27 #endif
28 
29 #ifdef WIN32
30 #include <win32_rasqal_config.h>
31 #endif
32 
33 #include <stdio.h>
34 #include <string.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 
39 
40 #include <raptor.h>
41 
42 /* Rasqal includes */
43 #include <rasqal.h>
44 #include <rasqal_internal.h>
45 
46 
47 static int rasqal_query_results_write_sparql_xml(rasqal_query_results_formatter* formatter, raptor_iostream *iostr, rasqal_query_results* results, raptor_uri *base_uri);
48 static rasqal_rowsource* rasqal_query_results_get_rowsource_sparql_xml(rasqal_query_results_formatter* formatter, rasqal_world *world, rasqal_variables_table* vars_table, raptor_iostream *iostr, raptor_uri *base_uri, unsigned int flags);
49 
50 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
51 #define TRACE_XML 1
52 #else
53 #undef TRACE_XML
54 #endif
55 
56 
57 #ifndef FILE_READ_BUF_SIZE
58 #ifdef BUFSIZ
59 #define FILE_READ_BUF_SIZE BUFSIZ
60 #else
61 #define FILE_READ_BUF_SIZE 1024
62 #endif
63 #endif
64 
65 
66 
67 /*
68  * rasqal_query_results_write_sparql_xml:
69  * @iostr: #raptor_iostream to write the query results to
70  * @results: #rasqal_query_results query results input
71  * @base_uri: #raptor_uri base URI of the output format
72  *
73  * Write the fourth version of the SPARQL XML query results format to an
74  * iostream in a format - INTERNAL.
75  *
76  * If the writing succeeds, the query results will be exhausted.
77  *
78  * Return value: non-0 on failure
79  **/
80 static int
rasqal_query_results_write_sparql_xml(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)81 rasqal_query_results_write_sparql_xml(rasqal_query_results_formatter* formatter,
82                                       raptor_iostream *iostr,
83                                       rasqal_query_results* results,
84                                       raptor_uri *base_uri)
85 {
86   int rc=1;
87   rasqal_world* world = rasqal_query_results_get_world(results);
88   raptor_xml_writer* xml_writer=NULL;
89   raptor_namespace *res_ns=NULL;
90   raptor_namespace_stack *nstack=NULL;
91   raptor_xml_element *sparql_element=NULL;
92   raptor_xml_element *results_element=NULL;
93   raptor_xml_element *result_element=NULL;
94   raptor_xml_element *element1=NULL;
95   raptor_xml_element *binding_element=NULL;
96   raptor_xml_element *variable_element=NULL;
97   raptor_qname **attrs=NULL;
98   int i;
99   rasqal_query_results_type type;
100 
101   type = rasqal_query_results_get_type(results);
102 
103   if(type != RASQAL_QUERY_RESULTS_BINDINGS &&
104      type != RASQAL_QUERY_RESULTS_BOOLEAN) {
105     rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR,
106                             NULL,
107                             "Cannot write XML format v3 for %s query result format",
108                             rasqal_query_results_type_label(type));
109     return 1;
110   }
111 
112   nstack = raptor_new_namespaces(world->raptor_world_ptr, 1);
113   if(!nstack)
114     return 1;
115 
116   xml_writer = raptor_new_xml_writer(world->raptor_world_ptr,
117                                      nstack,
118                                      iostr);
119   if(!xml_writer)
120     goto tidy;
121 
122   res_ns=raptor_new_namespace(nstack,
123                               NULL,
124                               RASQAL_GOOD_CAST(const unsigned char*, "http://www.w3.org/2005/sparql-results#"),
125                               0);
126   if(!res_ns)
127     goto tidy;
128 
129   sparql_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
130                                                                   RASQAL_GOOD_CAST(const unsigned char*, "sparql"),
131                                                                   NULL, base_uri);
132   if(!sparql_element)
133     goto tidy;
134 
135   raptor_xml_writer_start_element(xml_writer, sparql_element);
136   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
137 
138   /*   <head> */
139   element1=raptor_new_xml_element_from_namespace_local_name(res_ns,
140                                                             RASQAL_GOOD_CAST(const unsigned char*, "head"),
141                                                             NULL, base_uri);
142   if(!element1)
143     goto tidy;
144 
145   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "  "), 2);
146   raptor_xml_writer_start_element(xml_writer, element1);
147   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
148 
149   if(rasqal_query_results_is_bindings(results)) {
150     for(i=0; 1; i++) {
151       const unsigned char *name;
152       name=rasqal_query_results_get_binding_name(results, i);
153       if(!name)
154         break;
155 
156       /*     <variable name="x"/> */
157       variable_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
158                                                                         RASQAL_GOOD_CAST(const unsigned char*,"variable"),
159                                                                         NULL, base_uri);
160       if(!variable_element)
161         goto tidy;
162 
163       attrs=(raptor_qname **)raptor_alloc_memory(sizeof(raptor_qname*));
164       if(!attrs)
165         goto tidy;
166       attrs[0] = raptor_new_qname_from_namespace_local_name(world->raptor_world_ptr,
167                                                             res_ns,
168                                                             RASQAL_GOOD_CAST(const unsigned char*,"name"),
169                                                             RASQAL_GOOD_CAST(const unsigned char*, name)); /* attribute value */
170       if(!attrs[0]) {
171         raptor_free_memory((void*)attrs);
172         goto tidy;
173       }
174 
175       raptor_xml_element_set_attributes(variable_element, attrs, 1);
176 
177       raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "    "), 4);
178       raptor_xml_writer_empty_element(xml_writer, variable_element);
179       raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
180 
181       raptor_free_xml_element(variable_element);
182       variable_element=NULL;
183     }
184   }
185 
186   /* FIXME - could add <link> inside <head> */
187 
188 
189   /*   </head> */
190   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "  "), 2);
191   raptor_xml_writer_end_element(xml_writer, element1);
192   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
193 
194   raptor_free_xml_element(element1);
195   element1=NULL;
196 
197 
198   /* Boolean Results */
199   if(rasqal_query_results_is_boolean(results)) {
200     result_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
201                                                                     RASQAL_GOOD_CAST(const unsigned char*, "boolean"),
202                                                                     NULL, base_uri);
203     if(!result_element)
204       goto tidy;
205 
206     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "  "), 2);
207     raptor_xml_writer_start_element(xml_writer, result_element);
208     if(rasqal_query_results_get_boolean(results))
209       raptor_xml_writer_raw(xml_writer, rasqal_xsd_boolean_true);
210     else
211       raptor_xml_writer_raw(xml_writer, rasqal_xsd_boolean_false);
212     raptor_xml_writer_end_element(xml_writer, result_element);
213     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
214 
215     goto results3done;
216   }
217 
218 
219   /* Variable Binding Results */
220 
221   /*   <results> */
222   results_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
223                                                                    RASQAL_GOOD_CAST(const unsigned char*, "results"),
224                                                                    NULL, base_uri);
225   if(!results_element)
226     goto tidy;
227 
228   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "  "), 2);
229   raptor_xml_writer_start_element(xml_writer, results_element);
230   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
231 
232 
233   /* declare result element for later multiple use */
234   result_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
235                                                                   RASQAL_GOOD_CAST(const unsigned char*, "result"),
236                                                                   NULL, base_uri);
237   if(!result_element)
238     goto tidy;
239 
240   while(!rasqal_query_results_finished(results)) {
241     /*     <result> */
242     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "    "), 4);
243     raptor_xml_writer_start_element(xml_writer, result_element);
244     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
245 
246     for(i=0; i<rasqal_query_results_get_bindings_count(results); i++) {
247       const unsigned char *name=rasqal_query_results_get_binding_name(results, i);
248       rasqal_literal *l=rasqal_query_results_get_binding_value(results, i);
249 
250       /*       <binding> */
251       binding_element=raptor_new_xml_element_from_namespace_local_name(res_ns,
252                                                                        RASQAL_GOOD_CAST(const unsigned char*, "binding"),
253                                                                        NULL, base_uri);
254       if(!binding_element)
255         goto tidy;
256 
257       attrs=(raptor_qname **)raptor_alloc_memory(sizeof(raptor_qname*));
258       if(!attrs)
259         goto tidy;
260       attrs[0] = raptor_new_qname_from_namespace_local_name(world->raptor_world_ptr,
261                                                             res_ns,
262                                                             RASQAL_GOOD_CAST(const unsigned char*, "name"),
263                                                             name);
264 
265       if(!attrs[0]) {
266         raptor_free_memory((void*)attrs);
267         goto tidy;
268       }
269 
270       raptor_xml_element_set_attributes(binding_element, attrs, 1);
271 
272 
273       raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "      "), 6);
274       raptor_xml_writer_start_element(xml_writer, binding_element);
275 
276       if(!l) {
277         element1=raptor_new_xml_element_from_namespace_local_name(res_ns,
278                                                                   RASQAL_GOOD_CAST(const unsigned char*, "unbound"),
279                                                                   NULL, base_uri);
280         if(!element1)
281           goto tidy;
282         raptor_xml_writer_empty_element(xml_writer, element1);
283 
284       } else switch(l->type) {
285         case RASQAL_LITERAL_URI:
286           element1=raptor_new_xml_element_from_namespace_local_name(res_ns,
287                                                                     RASQAL_GOOD_CAST(const unsigned char*, "uri"),
288                                                                     NULL, base_uri);
289           if(!element1)
290             goto tidy;
291 
292           raptor_xml_writer_start_element(xml_writer, element1);
293           raptor_xml_writer_cdata(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_string(l->value.uri)));
294           raptor_xml_writer_end_element(xml_writer, element1);
295 
296           break;
297 
298         case RASQAL_LITERAL_BLANK:
299           element1=raptor_new_xml_element_from_namespace_local_name(res_ns,
300                                                                     RASQAL_GOOD_CAST(const unsigned char*, "bnode"),
301                                                                     NULL, base_uri);
302           if(!element1)
303             goto tidy;
304 
305           raptor_xml_writer_start_element(xml_writer, element1);
306           raptor_xml_writer_cdata(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, l->string));
307           raptor_xml_writer_end_element(xml_writer, element1);
308           break;
309 
310         case RASQAL_LITERAL_STRING:
311         case RASQAL_LITERAL_UDT:
312           element1=raptor_new_xml_element_from_namespace_local_name(res_ns,
313                                                                     RASQAL_GOOD_CAST(const unsigned char*, "literal"),
314                                                                     NULL, base_uri);
315           if(!element1)
316             goto tidy;
317 
318           if(l->language || l->datatype) {
319             attrs=(raptor_qname **)raptor_alloc_memory(sizeof(raptor_qname*));
320             if(!attrs)
321               goto tidy;
322 
323             if(l->language)
324               attrs[0]=raptor_new_qname(nstack,
325                                         RASQAL_GOOD_CAST(const unsigned char*, "xml:lang"),
326                                         RASQAL_GOOD_CAST(const unsigned char*, l->language));
327             else
328               attrs[0] = raptor_new_qname_from_namespace_local_name(world->raptor_world_ptr,
329                                                                     res_ns,
330                                                                     RASQAL_GOOD_CAST(const unsigned char*, "datatype"),
331                                                                     RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_string(l->datatype)));
332             if(!attrs[0]) {
333               raptor_free_memory((void*)attrs);
334               goto tidy;
335             }
336 
337             raptor_xml_element_set_attributes(element1, attrs, 1);
338           }
339 
340 
341           raptor_xml_writer_start_element(xml_writer, element1);
342 
343 
344           raptor_xml_writer_cdata_counted(xml_writer,
345                                           RASQAL_GOOD_CAST(const unsigned char*, l->string),
346                                           l->string_len);
347 
348           raptor_xml_writer_end_element(xml_writer, element1);
349 
350           break;
351         case RASQAL_LITERAL_PATTERN:
352         case RASQAL_LITERAL_QNAME:
353         case RASQAL_LITERAL_INTEGER:
354         case RASQAL_LITERAL_XSD_STRING:
355         case RASQAL_LITERAL_BOOLEAN:
356         case RASQAL_LITERAL_DOUBLE:
357         case RASQAL_LITERAL_FLOAT:
358         case RASQAL_LITERAL_VARIABLE:
359         case RASQAL_LITERAL_DECIMAL:
360         case RASQAL_LITERAL_DATE:
361         case RASQAL_LITERAL_DATETIME:
362         case RASQAL_LITERAL_INTEGER_SUBTYPE:
363 
364         case RASQAL_LITERAL_UNKNOWN:
365         default:
366           rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR,
367                                   NULL,
368                                   "Cannot turn literal type %u into XML",
369                                   l->type);
370           goto tidy;
371         }
372 
373       if(element1) {
374         raptor_free_xml_element(element1);
375         element1=NULL;
376       }
377 
378       /*       </binding> */
379       raptor_xml_writer_end_element(xml_writer, binding_element);
380       raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
381 
382       raptor_free_xml_element(binding_element);
383       binding_element=NULL;
384     }
385 
386     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "    "), 4);
387     raptor_xml_writer_end_element(xml_writer, result_element);
388     raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
389 
390     rasqal_query_results_next(results);
391   }
392 
393   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "  "), 2);
394   raptor_xml_writer_end_element(xml_writer, results_element);
395   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
396 
397   results3done:
398 
399   rc=0;
400 
401   raptor_xml_writer_end_element(xml_writer, sparql_element);
402   raptor_xml_writer_raw_counted(xml_writer, RASQAL_GOOD_CAST(const unsigned char*, "\n"), 1);
403 
404   tidy:
405   if(element1)
406     raptor_free_xml_element(element1);
407   if(variable_element)
408     raptor_free_xml_element(variable_element);
409   if(binding_element)
410     raptor_free_xml_element(binding_element);
411   if(result_element)
412     raptor_free_xml_element(result_element);
413   if(results_element)
414     raptor_free_xml_element(results_element);
415   if(sparql_element)
416     raptor_free_xml_element(sparql_element);
417   if(res_ns)
418     raptor_free_namespace(res_ns);
419   if(xml_writer)
420     raptor_free_xml_writer(xml_writer);
421   if(nstack)
422     raptor_free_namespaces(nstack);
423 
424   return rc;
425 }
426 
427 
428 const char* const sparql_xml_element_names[]=
429 {
430   "!",
431   /* In rough order they appear */
432   "sparql",
433   "head",
434   "boolean",
435   "binding",
436   "variable",
437   "results",
438   "result",
439   "bnode",
440   "literal",
441   "uri",
442   NULL
443 };
444 
445 
446 typedef enum
447 {
448   STATE_unknown,
449   /* In same order as above */
450   STATE_sparql,
451   STATE_head,
452   STATE_boolean,
453   STATE_binding,
454   STATE_variable,
455   STATE_results,
456   STATE_result,
457   STATE_bnode,
458   STATE_literal,
459   STATE_uri,
460   STATE_first = STATE_sparql,
461   STATE_last = STATE_uri
462 } rasqal_sparql_xml_read_state;
463 
464 
465 typedef struct
466 {
467   rasqal_world* world;
468   rasqal_rowsource* rowsource;
469 
470   int failed;
471 #ifdef TRACE_XML
472   int trace;
473 #endif
474 
475   /* Input fields */
476   raptor_uri* base_uri;
477   raptor_iostream* iostr;
478 
479   /* SAX2 fields */
480   raptor_sax2* sax2;
481   raptor_locator locator;
482   int depth; /* element depth */
483 
484   /* SPARQL XML Results parsing */
485   rasqal_sparql_xml_read_state state; /* state */
486   /* state-based fields for turning XML into rasqal literals, rows */
487   const char* name;  /* variable name (from binding/@name) */
488   size_t name_length;
489 
490   raptor_stringbuffer *sb; /* URI string, literal string or blank node ID */
491 
492   const char* datatype; /* literal datatype URI string from literal/@datatype */
493   const char* language; /* literal language from literal/@xml:lang */
494   rasqal_row* row; /* current result row */
495   int offset; /* current result row number */
496   int result_offset; /* current <result> column number */
497   unsigned char buffer[FILE_READ_BUF_SIZE]; /* iostream read buffer */
498 
499   /* Output fields */
500   raptor_sequence* results_sequence; /* saved result rows */
501 
502   /* Variables table allocated for variables in the result set */
503   rasqal_variables_table* vars_table;
504   int variables_count;
505 
506   unsigned int flags;
507 
508   int boolean_value;
509 } rasqal_rowsource_sparql_xml_context;
510 
511 
512 static void rasqal_sparql_xml_free_context(rasqal_rowsource_sparql_xml_context* con);
513 
514 
515 #ifdef TRACE_XML
516 static void
pad(FILE * fh,int depth)517 pad(FILE* fh, int depth)
518 {
519   int i;
520   for(i=0; i< depth; i++)
521     fputs("  ", fh);
522 }
523 #endif
524 
525 static int
rasqal_sparql_xml_start(rasqal_rowsource_sparql_xml_context * con)526 rasqal_sparql_xml_start(rasqal_rowsource_sparql_xml_context* con)
527 {
528   con->state = STATE_unknown;
529 
530 #ifdef TRACE_XML
531   con->trace = 1;
532 #endif
533   con->depth = 0;
534 
535   raptor_sax2_parse_start(con->sax2, con->base_uri);
536 
537   return 0;
538 }
539 
540 
541 static void
rasqal_sparql_xml_sax2_start_element_handler(void * user_data,raptor_xml_element * xml_element)542 rasqal_sparql_xml_sax2_start_element_handler(void *user_data,
543                                              raptor_xml_element *xml_element)
544 {
545   rasqal_rowsource_sparql_xml_context* con;
546   int i;
547   raptor_qname* name;
548   rasqal_sparql_xml_read_state state=STATE_unknown;
549   int attr_count;
550 
551   con=(rasqal_rowsource_sparql_xml_context*)user_data;
552 
553   name=raptor_xml_element_get_name(xml_element);
554 
555   for(i=STATE_first; i <= STATE_last; i++) {
556     if(!strcmp(RASQAL_GOOD_CAST(const char*, raptor_qname_get_local_name(name)),
557                sparql_xml_element_names[i])) {
558       state=(rasqal_sparql_xml_read_state)i;
559       con->state=state;
560     }
561   }
562 
563   if(state == STATE_unknown) {
564     fprintf(stderr, "UNKNOWN element %s\n", raptor_qname_get_local_name(name));
565     con->failed++;
566   }
567 
568 #ifdef TRACE_XML
569   if(con->trace) {
570     pad(stderr, con->depth);
571     fprintf(stderr, "Element %s (%d)\n", raptor_qname_get_local_name(name),
572             state);
573   }
574 #endif
575 
576   attr_count=raptor_xml_element_get_attributes_count(xml_element);
577   con->name=NULL;
578   con->sb = raptor_new_stringbuffer();
579   con->datatype=NULL;
580   con->language=NULL;
581 
582   if(attr_count > 0) {
583     raptor_qname** attrs=raptor_xml_element_get_attributes(xml_element);
584     for(i = 0; i < attr_count; i++) {
585       const char* local_name = RASQAL_GOOD_CAST(const char*, raptor_qname_get_local_name(attrs[i]));
586 #ifdef TRACE_XML
587       if(con->trace) {
588         pad(stderr, con->depth+1);
589         fprintf(stderr, "Attribute %s='%s'\n",
590                 local_name, raptor_qname_get_value(attrs[i]));
591       }
592 #endif
593 
594       if(!strcmp(local_name, "name"))
595         con->name = RASQAL_GOOD_CAST(const char*, raptor_qname_get_counted_value(attrs[i], &con->name_length));
596       else if(!strcmp(local_name, "datatype"))
597         con->datatype = RASQAL_GOOD_CAST(const char*, raptor_qname_get_value(attrs[i]));
598     }
599   }
600   if(raptor_xml_element_get_language(xml_element)) {
601     con->language = RASQAL_GOOD_CAST(const char*, raptor_xml_element_get_language(xml_element));
602 #ifdef TRACE_XML
603     if(con->trace) {
604       pad(stderr, con->depth+1);
605       fprintf(stderr, "xml:lang '%s'\n", con->language);
606     }
607 #endif
608   }
609 
610   switch(state) {
611     case STATE_variable:
612       if(con->name) {
613         rasqal_variable *v;
614         v = rasqal_variables_table_add2(con->vars_table,
615                                         RASQAL_VARIABLE_TYPE_NORMAL,
616                                         RASQAL_GOOD_CAST(const unsigned char*, con->name),
617                                         con->name_length, NULL);
618         if(v) {
619           rasqal_rowsource_add_variable(con->rowsource, v);
620           /* above function takes a reference to v */
621           rasqal_free_variable(v);
622         }
623       }
624       break;
625 
626     case STATE_result:
627       if(1) {
628         con->row = rasqal_new_row(con->rowsource);
629         RASQAL_DEBUG2("Made new row %d\n", con->offset);
630         con->offset++;
631       }
632       break;
633 
634     case STATE_binding:
635       con->result_offset = rasqal_rowsource_get_variable_offset_by_name(con->rowsource, RASQAL_GOOD_CAST(const unsigned char*, con->name));
636       break;
637 
638     case STATE_sparql:
639     case STATE_head:
640     case STATE_boolean:
641     case STATE_results:
642     case STATE_literal:
643     case STATE_bnode:
644     case STATE_uri:
645     case STATE_unknown:
646     default:
647       break;
648   }
649 
650   con->depth++;
651 }
652 
653 
654 static void
rasqal_sparql_xml_sax2_characters_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)655 rasqal_sparql_xml_sax2_characters_handler(void *user_data,
656                                           raptor_xml_element* xml_element,
657                                           const unsigned char *s, int len)
658 {
659   rasqal_rowsource_sparql_xml_context* con;
660   con=(rasqal_rowsource_sparql_xml_context*)user_data;
661 
662 #ifdef TRACE_XML
663   if(con->trace) {
664     pad(stderr, con->depth);
665     fputs("Text '", stderr);
666     fwrite(s, sizeof(char), len, stderr);
667     fprintf(stderr, "' (%d bytes)\n", len);
668   }
669 #endif
670 
671   if(con->state == STATE_literal ||
672      con->state == STATE_uri ||
673      con->state == STATE_bnode ||
674      con->state == STATE_boolean ) {
675     raptor_stringbuffer_append_counted_string(con->sb, s, RASQAL_BAD_CAST(size_t, len), 1);
676   }
677 }
678 
679 
680 static void
rasqal_sparql_xml_sax2_end_element_handler(void * user_data,raptor_xml_element * xml_element)681 rasqal_sparql_xml_sax2_end_element_handler(void *user_data,
682                                            raptor_xml_element* xml_element)
683 {
684   rasqal_rowsource_sparql_xml_context* con;
685   raptor_qname* name;
686   int i;
687   rasqal_sparql_xml_read_state state=STATE_unknown;
688   const char* value = NULL;
689   size_t value_len = 0;
690 
691   con=(rasqal_rowsource_sparql_xml_context*)user_data;
692 
693   name=raptor_xml_element_get_name(xml_element);
694 
695   if(con->sb) {
696     value_len = raptor_stringbuffer_length(con->sb);
697     value = RASQAL_GOOD_CAST(const char*, raptor_stringbuffer_as_string(con->sb));
698   }
699 
700   for(i=STATE_first; i <= STATE_last; i++) {
701     if(!strcmp(RASQAL_GOOD_CAST(const char*, raptor_qname_get_local_name(name)),
702                sparql_xml_element_names[i])) {
703       state=(rasqal_sparql_xml_read_state)i;
704       con->state=state;
705     }
706   }
707 
708   if(state == STATE_unknown) {
709     fprintf(stderr, "UNKNOWN element %s\n", raptor_qname_get_local_name(name));
710     con->failed++;
711   }
712 
713   con->depth--;
714 #ifdef TRACE_XML
715   if(con->trace) {
716     pad(stderr, con->depth);
717     fprintf(stderr, "End Element %s (%d)\n", raptor_qname_get_local_name(name),
718             con->state);
719   }
720 #endif
721 
722   switch(con->state) {
723     case STATE_head:
724       if(con->vars_table) {
725         /* Only now is the full number of variables correct in
726          * con->rowsource->size */
727         con->variables_count = con->rowsource->size;
728       }
729       break;
730 
731     case STATE_boolean:
732       if(1) {
733         con->boolean_value = -1;
734         if(value_len == 4 && !strncmp(value, "true", value_len))
735           con->boolean_value = 1;
736         else if(value_len == 5 && !strncmp(value, "false", value_len))
737           con->boolean_value = 0;
738         RASQAL_DEBUG3("boolean result string '%s' value %d\n", value, con->boolean_value);
739       }
740       break;
741 
742     case STATE_literal:
743       if(1) {
744         rasqal_literal* l;
745         unsigned char* lvalue;
746         raptor_uri* datatype_uri=NULL;
747         char* language_str=NULL;
748 
749         lvalue = RASQAL_MALLOC(unsigned char*, value_len + 1);
750         if(!value_len)
751           *lvalue = '\0';
752         else
753           memcpy(lvalue, value, value_len + 1);
754         if(con->datatype)
755           datatype_uri = raptor_new_uri(con->world->raptor_world_ptr, RASQAL_GOOD_CAST(const unsigned char*, con->datatype));
756         if(con->language) {
757           size_t language_len = strlen(con->language);
758           language_str = RASQAL_MALLOC(char*, language_len + 1);
759           memcpy(language_str, con->language, language_len + 1);
760         }
761         l = rasqal_new_string_literal_node(con->world, lvalue, language_str,
762                                            datatype_uri);
763         rasqal_row_set_value_at(con->row, con->result_offset, l);
764         rasqal_free_literal(l);
765         RASQAL_DEBUG3("Saving row result %d string value at offset %d\n",
766                       con->offset, con->result_offset);
767       }
768       break;
769 
770     case STATE_bnode:
771       if(1) {
772         rasqal_literal* l;
773         unsigned char* lvalue;
774         lvalue = RASQAL_MALLOC(unsigned char*, value_len + 1);
775         memcpy(lvalue, value, value_len + 1);
776         l = rasqal_new_simple_literal(con->world, RASQAL_LITERAL_BLANK, lvalue);
777         rasqal_row_set_value_at(con->row, con->result_offset, l);
778         rasqal_free_literal(l);
779         RASQAL_DEBUG3("Saving row result %d bnode value at offset %d\n",
780                       con->offset, con->result_offset);
781       }
782       break;
783 
784     case STATE_uri:
785       if(1) {
786         raptor_uri* uri;
787         rasqal_literal* l;
788         uri = raptor_new_uri(con->world->raptor_world_ptr, RASQAL_GOOD_CAST(const unsigned char*, value));
789         l = rasqal_new_uri_literal(con->world, uri);
790         rasqal_row_set_value_at(con->row, con->result_offset, l);
791         rasqal_free_literal(l);
792         RASQAL_DEBUG3("Saving row result %d uri value at offset %d\n",
793                       con->offset, con->result_offset);
794       }
795       break;
796 
797     case STATE_result:
798       if(con->row) {
799         RASQAL_DEBUG2("Saving row result %d\n", con->offset);
800         con->row->offset = con->offset - 1;
801         raptor_sequence_push(con->results_sequence, con->row);
802       }
803       con->row = NULL;
804       break;
805 
806     case STATE_unknown:
807     case STATE_sparql:
808     case STATE_variable:
809     case STATE_results:
810     case STATE_binding:
811     default:
812       break;
813   }
814 
815   if(con->sb) {
816     raptor_free_stringbuffer(con->sb);
817     con->sb = raptor_new_stringbuffer();
818   }
819 }
820 
821 
822 /* Local handlers for turning sparql XML read from an iostream into rows */
823 
824 static int
rasqal_rowsource_sparql_xml_init(rasqal_rowsource * rowsource,void * user_data)825 rasqal_rowsource_sparql_xml_init(rasqal_rowsource* rowsource, void *user_data)
826 {
827   rasqal_rowsource_sparql_xml_context* con;
828 
829   con = (rasqal_rowsource_sparql_xml_context*)user_data;
830 
831   con->rowsource = rowsource;
832 
833   return rasqal_sparql_xml_start(con);
834 }
835 
836 
837 static int
rasqal_rowsource_sparql_xml_finish(rasqal_rowsource * rowsource,void * user_data)838 rasqal_rowsource_sparql_xml_finish(rasqal_rowsource* rowsource, void *user_data)
839 {
840   rasqal_rowsource_sparql_xml_context* con;
841 
842   con = (rasqal_rowsource_sparql_xml_context*)user_data;
843 
844   rasqal_sparql_xml_free_context(con);
845 
846   return 0;
847 }
848 
849 
850 static void
rasqal_rowsource_sparql_xml_process(rasqal_rowsource_sparql_xml_context * con)851 rasqal_rowsource_sparql_xml_process(rasqal_rowsource_sparql_xml_context* con)
852 {
853   if(raptor_sequence_size(con->results_sequence) && con->variables_count > 0)
854     return;
855 
856   /* do some parsing - need some results */
857   while(!raptor_iostream_read_eof(con->iostr)) {
858     size_t read_len;
859 
860     read_len = RASQAL_BAD_CAST(size_t,
861                                raptor_iostream_read_bytes(RASQAL_GOOD_CAST(char*, con->buffer), 1,
862                                                           FILE_READ_BUF_SIZE,
863                                                           con->iostr));
864     if(read_len > 0) {
865 #ifdef TRACE_XML
866       RASQAL_DEBUG2("processing %d bytes\n", RASQAL_GOOD_CAST(int, read_len));
867 #endif
868       raptor_sax2_parse_chunk(con->sax2, con->buffer, read_len, 0);
869     }
870 
871     if(read_len < FILE_READ_BUF_SIZE) {
872       /* finished */
873       raptor_sax2_parse_chunk(con->sax2, NULL, 0, 1);
874       break;
875     }
876 
877     /* end with variables sequence done AND at least one row */
878     if(con->variables_count > 0 &&
879        raptor_sequence_size(con->results_sequence) > 0)
880       break;
881   }
882 
883 }
884 
885 
886 static int
rasqal_rowsource_sparql_xml_ensure_variables(rasqal_rowsource * rowsource,void * user_data)887 rasqal_rowsource_sparql_xml_ensure_variables(rasqal_rowsource* rowsource,
888                                              void *user_data)
889 {
890   rasqal_rowsource_sparql_xml_context* con;
891 
892   con = (rasqal_rowsource_sparql_xml_context*)user_data;
893 
894   rasqal_rowsource_sparql_xml_process(con);
895 
896   return con->failed;
897 }
898 
899 
900 static rasqal_row*
rasqal_rowsource_sparql_xml_read_row(rasqal_rowsource * rowsource,void * user_data)901 rasqal_rowsource_sparql_xml_read_row(rasqal_rowsource* rowsource,
902                                      void *user_data)
903 {
904   rasqal_rowsource_sparql_xml_context* con;
905   rasqal_row* row=NULL;
906 
907   con=(rasqal_rowsource_sparql_xml_context*)user_data;
908 
909   rasqal_rowsource_sparql_xml_process(con);
910 
911   if(!con->failed && raptor_sequence_size(con->results_sequence) > 0) {
912 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
913     RASQAL_DEBUG1("getting row from stored sequence\n");
914 #endif
915     row=(rasqal_row*)raptor_sequence_unshift(con->results_sequence);
916   }
917 
918   return row;
919 }
920 
921 
922 /*
923  * rasqal_sparql_xml_init_context:
924  * @world: rasqal world object
925  * @iostr: #raptor_iostream to read the query results from
926  * @base_uri: #raptor_uri base URI of the input format
927  * @flags: flags
928  *
929  * INTERNAL - Initialise the SPARQL XML context
930  *
931  * Return value: context or NULL on failure
932  **/
933 static rasqal_rowsource_sparql_xml_context*
rasqal_sparql_xml_init_context(rasqal_world * world,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)934 rasqal_sparql_xml_init_context(rasqal_world *world,
935                                raptor_iostream *iostr,
936                                raptor_uri *base_uri,
937                                unsigned int flags)
938 {
939   rasqal_rowsource_sparql_xml_context* con;
940 
941   con = RASQAL_CALLOC(rasqal_rowsource_sparql_xml_context*, 1, sizeof(*con));
942   if(!con)
943     return NULL;
944 
945   con->world = world;
946   con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
947   con->iostr = iostr;
948 
949   con->locator.uri = base_uri;
950 
951   con->sax2 = raptor_new_sax2(world->raptor_world_ptr, &con->locator, con);
952   if(!con->sax2)
953     return NULL;
954 
955   con->flags = flags;
956 
957   raptor_sax2_set_start_element_handler(con->sax2,
958                                         rasqal_sparql_xml_sax2_start_element_handler);
959   raptor_sax2_set_characters_handler(con->sax2,
960                                      rasqal_sparql_xml_sax2_characters_handler);
961 
962   raptor_sax2_set_end_element_handler(con->sax2,
963                                       rasqal_sparql_xml_sax2_end_element_handler);
964 
965   return con;
966 }
967 
968 
969 
970 /*
971  * rasqal_sparql_xml_free_context:
972  * @con: SPARQL xml context
973  *
974  * INTERNAL - Free the SPARQL XML context
975  **/
976 static void
rasqal_sparql_xml_free_context(rasqal_rowsource_sparql_xml_context * con)977 rasqal_sparql_xml_free_context(rasqal_rowsource_sparql_xml_context* con)
978 {
979 
980   if(con->base_uri)
981     raptor_free_uri(con->base_uri);
982 
983   if(con->sax2)
984     raptor_free_sax2(con->sax2);
985 
986   if(con->results_sequence)
987     raptor_free_sequence(con->results_sequence);
988 
989   if(con->vars_table)
990     rasqal_free_variables_table(con->vars_table);
991 
992   if(con->flags) {
993     if(con->iostr)
994       raptor_free_iostream(con->iostr);
995   }
996 
997   if(con->sb)
998     raptor_free_stringbuffer(con->sb);
999 
1000   RASQAL_FREE(rasqal_rowsource_sparql_xml_context, con);
1001 }
1002 
1003 
1004 static int
rasqal_rowsource_sparql_xml_get_boolean(rasqal_query_results_formatter * formatter,rasqal_world * world,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)1005 rasqal_rowsource_sparql_xml_get_boolean(rasqal_query_results_formatter *formatter,
1006                                         rasqal_world* world, raptor_iostream *iostr,
1007                                         raptor_uri *base_uri, unsigned int flags)
1008 {
1009   rasqal_rowsource_sparql_xml_context* con;
1010   int bv;
1011 
1012   con = rasqal_sparql_xml_init_context(world, iostr, base_uri, flags);
1013   if(!con)
1014     return -1;
1015 
1016   rasqal_sparql_xml_start(con);
1017 
1018   /* do some parsing - until we get the boolean value */
1019   while(!raptor_iostream_read_eof(con->iostr)) {
1020     size_t read_len;
1021 
1022     read_len = RASQAL_BAD_CAST(size_t,
1023                                raptor_iostream_read_bytes(RASQAL_GOOD_CAST(char*, con->buffer), 1,
1024                                                           FILE_READ_BUF_SIZE,
1025                                                           con->iostr));
1026     if(read_len > 0) {
1027 #ifdef TRACE_XML
1028       RASQAL_DEBUG2("processing %d bytes\n", RASQAL_GOOD_CAST(int, read_len));
1029 #endif
1030       raptor_sax2_parse_chunk(con->sax2, con->buffer, read_len, 0);
1031     }
1032 
1033     if(read_len < FILE_READ_BUF_SIZE) {
1034       /* finished */
1035       raptor_sax2_parse_chunk(con->sax2, NULL, 0, 1);
1036       break;
1037     }
1038 
1039     /* end with any boolean value */
1040     if(con->boolean_value >= 0)
1041       break;
1042   }
1043 
1044   bv = con->boolean_value;
1045 
1046   rasqal_sparql_xml_free_context(con);
1047 
1048   return bv;
1049 }
1050 
1051 
1052 static const rasqal_rowsource_handler rasqal_rowsource_sparql_xml_handler={
1053   /* .version = */ 1,
1054   "SPARQL XML",
1055   /* .init = */ rasqal_rowsource_sparql_xml_init,
1056   /* .finish = */ rasqal_rowsource_sparql_xml_finish,
1057   /* .ensure_variables = */ rasqal_rowsource_sparql_xml_ensure_variables,
1058   /* .read_row = */ rasqal_rowsource_sparql_xml_read_row,
1059   /* .read_all_rows = */ NULL,
1060   /* .reset = */ NULL,
1061   /* .set_requirements = */ NULL,
1062   /* .get_inner_rowsource = */ NULL,
1063   /* .set_origin = */ NULL,
1064 };
1065 
1066 
1067 
1068 /*
1069  * rasqal_query_results_getrowsource_sparql_xml:
1070  * @world: rasqal world object
1071  * @iostr: #raptor_iostream to read the query results from
1072  * @base_uri: #raptor_uri base URI of the input format
1073  *
1074  * INTERNAL - Read the fourth version of the SPARQL XML query results
1075  * format from an iostream in a format returning a rowsource.
1076  *
1077  * Return value: a new rasqal_rowsource or NULL on failure
1078  **/
1079 static rasqal_rowsource*
rasqal_query_results_get_rowsource_sparql_xml(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)1080 rasqal_query_results_get_rowsource_sparql_xml(rasqal_query_results_formatter* formatter,
1081                                               rasqal_world *world,
1082                                               rasqal_variables_table* vars_table,
1083                                               raptor_iostream *iostr,
1084                                               raptor_uri *base_uri,
1085                                               unsigned int flags)
1086 {
1087   rasqal_rowsource_sparql_xml_context* con;
1088 
1089   con = rasqal_sparql_xml_init_context(world, iostr, base_uri, flags);
1090   if(!con)
1091     return NULL;
1092 
1093   con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
1094 
1095   con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
1096 
1097   return rasqal_new_rowsource_from_handler(world, NULL,
1098                                            con,
1099                                            &rasqal_rowsource_sparql_xml_handler,
1100                                            con->vars_table,
1101                                            0);
1102 }
1103 
1104 
1105 
1106 static int
rasqal_query_results_xml_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)1107 rasqal_query_results_xml_recognise_syntax(rasqal_query_results_format_factory* factory,
1108                                           const unsigned char *buffer,
1109                                           size_t len,
1110                                           const unsigned char *identifier,
1111                                           const unsigned char *suffix,
1112 const char *mime_type)
1113 {
1114 
1115   if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "srx"))
1116     return 8;
1117 
1118   return 0;
1119 }
1120 
1121 
1122 static const char* const sparql_xml_names[] = { "xml", NULL};
1123 
1124 static const char* const sparql_xml_uri_strings[] = {
1125   "http://www.w3.org/ns/formats/SPARQL_Results_XML",
1126   "http://www.w3.org/TR/rdf-sparql-XMLres/",
1127   "http://www.w3.org/TR/2008/REC-rdf-sparql-XMLres-20080115/",
1128   "http://www.w3.org/2005/sparql-results#",
1129   NULL
1130 };
1131 
1132 static const raptor_type_q sparql_xml_types[] = {
1133   { "application/sparql-results+xml", 30, 10},
1134   { NULL, 0, 0}
1135 };
1136 
1137 static int
rasqal_query_results_sparql_xml_register_factory(rasqal_query_results_format_factory * factory)1138 rasqal_query_results_sparql_xml_register_factory(rasqal_query_results_format_factory *factory)
1139 {
1140   int rc = 0;
1141 
1142   factory->desc.names = sparql_xml_names;
1143   factory->desc.mime_types = sparql_xml_types;
1144 
1145   factory->desc.label = "SPARQL XML Query Results";
1146   factory->desc.uri_strings = sparql_xml_uri_strings;
1147 
1148   factory->desc.flags = 0;
1149 
1150   factory->write         = rasqal_query_results_write_sparql_xml;
1151   factory->get_rowsource = rasqal_query_results_get_rowsource_sparql_xml;
1152   factory->recognise_syntax = rasqal_query_results_xml_recognise_syntax;
1153   factory->get_boolean      = rasqal_rowsource_sparql_xml_get_boolean;
1154 
1155   return rc;
1156 }
1157 
1158 
1159 int
rasqal_init_result_format_sparql_xml(rasqal_world * world)1160 rasqal_init_result_format_sparql_xml(rasqal_world* world)
1161 {
1162   return !rasqal_world_register_query_results_format_factory(world,
1163                                                              &rasqal_query_results_sparql_xml_register_factory);
1164 }
1165