1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_service.c - Rasqal SPARQL Protocol Service
4  *
5  * Copyright (C) 2010, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40 #include <stdarg.h>
41 
42 #include "rasqal.h"
43 #include "rasqal_internal.h"
44 
45 
46 #define DEFAULT_FORMAT "application/sparql-results+xml"
47 
48 
49 struct rasqal_service_s
50 {
51   rasqal_world* world;
52 
53   /* request fields */
54   raptor_uri* service_uri;
55   char* query_string;
56   size_t query_string_len;
57   raptor_sequence* data_graphs; /* background graph and named graphs */
58   char* format; /* MIME Type to use as request HTTP Accept: */
59 
60   /* URL retrieval fields */
61   raptor_www* www;
62   int started;
63 
64   /* response fields */
65   raptor_uri* final_uri;
66   raptor_stringbuffer* sb;
67   char* content_type;
68 
69   int usage;
70 };
71 
72 
73 
74 /**
75  * rasqal_new_service:
76  * @world: rasqal_world object
77  * @service_uri: sparql protocol service URI
78  * @query_string: query string (or NULL)
79  * @data_graphs: sequence of #rasqal_data_graph graphs for service
80  *
81  * Constructor - create a new rasqal protocol service object.
82  *
83  * Create a structure to execute a sparql protocol service at
84  * @service_uri running the query @query_string and returning
85  * a sparql result set.
86  *
87  * All arguments are copied by the service object
88  *
89  * Return value: a new #rasqal_query object or NULL on failure
90  */
91 rasqal_service*
rasqal_new_service(rasqal_world * world,raptor_uri * service_uri,const unsigned char * query_string,raptor_sequence * data_graphs)92 rasqal_new_service(rasqal_world* world, raptor_uri* service_uri,
93                    const unsigned char* query_string,
94                    raptor_sequence* data_graphs)
95 {
96   rasqal_service* svc;
97   size_t len = 0;
98 
99   RASQAL_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, rasqal_world, NULL);
100   RASQAL_ASSERT_OBJECT_POINTER_RETURN_VALUE(service_uri, raptor_uri, NULL);
101 
102   svc = RASQAL_CALLOC(rasqal_service*, 1, sizeof(*svc));
103   if(!svc)
104     return NULL;
105 
106   svc->usage = 1;
107   svc->world = world;
108   svc->service_uri = raptor_uri_copy(service_uri);
109 
110   if(query_string) {
111     len = strlen(RASQAL_GOOD_CAST(const char*, query_string));
112     svc->query_string = RASQAL_MALLOC(char*, len + 1);
113     if(!svc->query_string) {
114       rasqal_free_service(svc);
115       return NULL;
116     }
117 
118     memcpy(svc->query_string, query_string, len + 1);
119   }
120   svc->query_string_len = len;
121 
122   if(data_graphs) {
123     int i;
124     rasqal_data_graph* dg;
125 
126     svc->data_graphs = raptor_new_sequence((raptor_data_free_handler)rasqal_free_data_graph,
127                                            NULL);
128 
129     if(!svc->data_graphs) {
130       rasqal_free_service(svc);
131       return NULL;
132     }
133 
134     for(i = 0;
135         (dg = (rasqal_data_graph*)raptor_sequence_get_at(data_graphs, i));
136         i++) {
137       raptor_sequence_push(svc->data_graphs,
138                            rasqal_new_data_graph_from_data_graph(dg));
139     }
140   }
141 
142   return svc;
143 }
144 
145 
146 /*
147  * rasqal_new_service_from_service:
148  * @service: #rasqal_service to copy
149  *
150  * INTERNAL - Copy Constructor - Create a new Rasqal service from an existing one
151  *
152  * This adds a new reference to the service, it does not do a deep copy
153  *
154  * Return value: a new #rasqal_service or NULL on failure.
155  **/
156 rasqal_service*
rasqal_new_service_from_service(rasqal_service * svc)157 rasqal_new_service_from_service(rasqal_service* svc)
158 {
159   if(!svc)
160     return NULL;
161 
162   svc->usage++;
163 
164   return svc;
165 }
166 
167 
168 /**
169  * rasqal_free_service:
170  * @svc: #rasqal_service object
171  *
172  * Destructor - destroy a #rasqal_service object.
173  **/
174 void
rasqal_free_service(rasqal_service * svc)175 rasqal_free_service(rasqal_service* svc)
176 {
177   if(!svc)
178     return;
179 
180   if(--svc->usage)
181     return;
182 
183   if(svc->service_uri)
184     raptor_free_uri(svc->service_uri);
185 
186   if(svc->query_string)
187     RASQAL_FREE(char*, svc->query_string);
188 
189   if(svc->data_graphs)
190     raptor_free_sequence(svc->data_graphs);
191 
192   rasqal_service_set_www(svc, NULL);
193 
194   RASQAL_FREE(rasqal_service, svc);
195 }
196 
197 
198 /**
199  * rasqal_service_set_www:
200  * @svc: #rasqal_service service object
201  * @www: WWW object (or NULL)
202  *
203  * Set the WWW object to use when executing the service
204  *
205  * Return value: non 0 on failure
206  **/
207 int
rasqal_service_set_www(rasqal_service * svc,raptor_www * www)208 rasqal_service_set_www(rasqal_service* svc, raptor_www* www)
209 {
210   if(svc->www)
211     raptor_free_www(svc->www);
212 
213   svc->www = www;
214 
215   return 0;
216 }
217 
218 
219 /**
220  * rasqal_service_set_format:
221  * @svc: #rasqal_service service object
222  * @format: service mime type (or NULL)
223  *
224  * Set the MIME Type to use in HTTP Accept when executing the service
225  *
226  * Return value: non 0 on failure
227  **/
228 int
rasqal_service_set_format(rasqal_service * svc,const char * format)229 rasqal_service_set_format(rasqal_service* svc, const char *format)
230 {
231   size_t len;
232 
233   if(svc->format) {
234     RASQAL_FREE(char*, svc->format);
235     svc->format = NULL;
236   }
237 
238   if(!format)
239     return 0;
240 
241   len = strlen(format);
242   svc->format = RASQAL_MALLOC(char*, len + 1);
243   if(!svc->format)
244     return 1;
245 
246   memcpy(svc->format, format, len + 1);
247 
248   return 0;
249 }
250 
251 
252 static void
rasqal_service_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)253 rasqal_service_write_bytes(raptor_www* www,
254                            void *userdata, const void *ptr,
255                            size_t size, size_t nmemb)
256 {
257   rasqal_service* svc = (rasqal_service*)userdata;
258   size_t len = size * nmemb;
259 
260   if(!svc->started) {
261     svc->final_uri = raptor_www_get_final_uri(www);
262     svc->started = 1;
263   }
264 
265   raptor_stringbuffer_append_counted_string(svc->sb,
266                                             RASQAL_GOOD_CAST(const unsigned char*, ptr),
267                                             len, 1);
268 }
269 
270 
271 static void
rasqal_service_content_type_handler(raptor_www * www,void * userdata,const char * content_type)272 rasqal_service_content_type_handler(raptor_www* www, void* userdata,
273                                     const char* content_type)
274 {
275   rasqal_service* svc = (rasqal_service*)userdata;
276   size_t len;
277 
278   if(svc->content_type)
279     RASQAL_FREE(char*, svc->content_type);
280 
281   len = strlen(content_type);
282   svc->content_type = RASQAL_MALLOC(char*, len + 1);
283 
284   if(svc->content_type) {
285     char* p;
286 
287     memcpy(svc->content_type, content_type, len + 1);
288 
289     for(p = svc->content_type; *p; p++) {
290       if(*p == ';' || *p == ' ') {
291         *p = '\0';
292         break;
293       }
294     }
295   }
296 
297 }
298 
299 
300 /**
301  * rasqal_service_execute_as_rowsource:
302  * @svc: rasqal service
303  *
304  * INTERNAL - Execute a rasqal sparql protocol service to a rowsurce
305  *
306  * Return value: query results or NULL on failure
307  */
308 rasqal_rowsource*
rasqal_service_execute_as_rowsource(rasqal_service * svc,rasqal_variables_table * vars_table)309 rasqal_service_execute_as_rowsource(rasqal_service* svc,
310                                     rasqal_variables_table* vars_table)
311 {
312   raptor_iostream* read_iostr = NULL;
313   raptor_uri* read_base_uri = NULL;
314   rasqal_query_results_formatter* read_formatter = NULL;
315   raptor_uri* retrieval_uri = NULL;
316   raptor_stringbuffer* uri_sb = NULL;
317   size_t len;
318   unsigned char* str;
319   raptor_world* raptor_world_ptr = rasqal_world_get_raptor(svc->world);
320   rasqal_rowsource* rowsource = NULL;
321 
322   if(!svc->www) {
323     svc->www = raptor_new_www(raptor_world_ptr);
324 
325     if(!svc->www) {
326       rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
327                               "Failed to create WWW");
328       goto error;
329     }
330   }
331 
332   svc->started = 0;
333   svc->final_uri = NULL;
334   svc->sb = raptor_new_stringbuffer();
335   svc->content_type = NULL;
336 
337   if(svc->format)
338     raptor_www_set_http_accept(svc->www, svc->format);
339   else
340     raptor_www_set_http_accept(svc->www, DEFAULT_FORMAT);
341 
342   raptor_www_set_write_bytes_handler(svc->www,
343                                      rasqal_service_write_bytes, svc);
344   raptor_www_set_content_type_handler(svc->www,
345                                       rasqal_service_content_type_handler, svc);
346 
347 
348   /* Construct a URI to retrieve following SPARQL protocol HTTP
349    *  binding from concatenation of
350    *
351    * 1. service_uri
352    * 2. '?'
353    * 3. "query=" query_string
354    * 4. "&default-graph-uri=" background graph URI if any
355    * 5. "&named-graph-uri=" named graph URI for all named graphs
356    * with URI-escaping of the values
357    */
358 
359   uri_sb = raptor_new_stringbuffer();
360   if(!uri_sb) {
361     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
362                             "Failed to create stringbuffer");
363     goto error;
364   }
365 
366   str = raptor_uri_as_counted_string(svc->service_uri, &len);
367   raptor_stringbuffer_append_counted_string(uri_sb, str, len, 1);
368 
369   raptor_stringbuffer_append_counted_string(uri_sb,
370                                             RASQAL_GOOD_CAST(const unsigned char*, "?"), 1, 1);
371 
372   if(svc->query_string) {
373     raptor_stringbuffer_append_counted_string(uri_sb,
374                                               RASQAL_GOOD_CAST(const unsigned char*, "query="), 6, 1);
375     raptor_stringbuffer_append_uri_escaped_counted_string(uri_sb,
376                                                           svc->query_string,
377                                                           svc->query_string_len,
378                                                           1);
379   }
380 
381 
382   if(svc->data_graphs) {
383     rasqal_data_graph* dg;
384     int i;
385     int bg_graph_count;
386 
387     for(i = 0, bg_graph_count = 0;
388         (dg = (rasqal_data_graph*)raptor_sequence_get_at(svc->data_graphs, i));
389         i++) {
390       unsigned char* graph_str;
391       size_t graph_len;
392       raptor_uri* graph_uri;
393 
394       if(dg->flags & RASQAL_DATA_GRAPH_BACKGROUND) {
395 
396         if(bg_graph_count++) {
397           if(bg_graph_count == 2) {
398             /* Warn once, only when the second BG is seen */
399             rasqal_log_warning_simple(svc->world,
400                                       RASQAL_WARNING_LEVEL_MULTIPLE_BG_GRAPHS,
401                                       NULL,
402                                       "Attempted to add multiple background graphs");
403           }
404           /* always skip after first BG */
405           continue;
406         }
407 
408         raptor_stringbuffer_append_counted_string(uri_sb,
409                                                   RASQAL_GOOD_CAST(const unsigned char*, "&default-graph-uri="), 19, 1);
410         graph_uri = dg->uri;
411       } else {
412         raptor_stringbuffer_append_counted_string(uri_sb,
413                                                   RASQAL_GOOD_CAST(const unsigned char*, "&named-graph-uri="), 17, 1);
414         graph_uri = dg->name_uri;
415       }
416 
417       graph_str = raptor_uri_as_counted_string(graph_uri, &graph_len);
418       raptor_stringbuffer_append_uri_escaped_counted_string(uri_sb,
419                                                             RASQAL_GOOD_CAST(const char*, graph_str), graph_len, 1);
420     }
421   }
422 
423 
424   str = raptor_stringbuffer_as_string(uri_sb);
425 
426   retrieval_uri = raptor_new_uri(raptor_world_ptr, str);
427   if(!retrieval_uri) {
428     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
429                             "Failed to create retrieval URI %s",
430                             raptor_uri_as_string(retrieval_uri));
431     goto error;
432   }
433 
434   raptor_free_stringbuffer(uri_sb); uri_sb = NULL;
435 
436   if(raptor_www_fetch(svc->www, retrieval_uri)) {
437     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
438                             "Failed to fetch retrieval URI %s",
439                             raptor_uri_as_string(retrieval_uri));
440     goto error;
441   }
442 
443   /* Takes ownership of svc->sb */
444   read_iostr = rasqal_new_iostream_from_stringbuffer(raptor_world_ptr,
445                                                      svc->sb);
446   svc->sb = NULL;
447   if(!read_iostr) {
448     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
449                             "Failed to create iostream from string");
450     goto error;
451   }
452 
453   read_base_uri = svc->final_uri ? svc->final_uri : svc->service_uri;
454   read_formatter = rasqal_new_query_results_formatter(svc->world,
455                                                       /* format name */ NULL,
456                                                       svc->content_type,
457                                                       /* format URI */ NULL);
458   if(!read_formatter) {
459     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
460                             "Failed to find query results reader for content type %s returned from %s",
461                             svc->content_type,
462                             raptor_uri_as_string(read_base_uri));
463     goto error;
464   }
465 
466   /* Takes ownership of read_iostr with flags = 1 */
467   rowsource = rasqal_query_results_formatter_get_read_rowsource(svc->world,
468                                                                 read_iostr,
469                                                                 read_formatter,
470                                                                 vars_table,
471                                                                 read_base_uri,
472                                                                 /* flags */ 1);
473   read_iostr = NULL;
474   if(!rowsource) {
475     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
476                             "Failed to decode %s query results data returned from %s",
477                             svc->content_type,
478                             raptor_uri_as_string(read_base_uri));
479     goto error;
480   }
481 
482 
483   error:
484   if(retrieval_uri)
485     raptor_free_uri(retrieval_uri);
486 
487   if(uri_sb)
488     raptor_free_stringbuffer(uri_sb);
489 
490   if(read_formatter)
491     rasqal_free_query_results_formatter(read_formatter);
492 
493   if(read_iostr)
494     raptor_free_iostream(read_iostr);
495 
496   if(svc->final_uri) {
497     raptor_free_uri(svc->final_uri);
498     svc->final_uri = NULL;
499   }
500 
501   if(svc->content_type) {
502     RASQAL_FREE(char*, svc->content_type);
503     svc->content_type = NULL;
504   }
505 
506   if(svc->sb) {
507     raptor_free_stringbuffer(svc->sb);
508     svc->sb = NULL;
509   }
510 
511   return rowsource;
512 }
513 
514 
515 /**
516  * rasqal_service_execute:
517  * @svc: rasqal service
518  *
519  * Execute a rasqal sparql protocol service
520  *
521  * Return value: query results or NULL on failure
522  */
523 rasqal_query_results*
rasqal_service_execute(rasqal_service * svc)524 rasqal_service_execute(rasqal_service* svc)
525 {
526   rasqal_query_results* results = NULL;
527   rasqal_variables_table* vars_table;
528   rasqal_rowsource* rowsource = NULL;
529 
530   results = rasqal_new_query_results2(svc->world, NULL,
531                                       RASQAL_QUERY_RESULTS_BINDINGS);
532 
533   if(!results) {
534     rasqal_log_error_simple(svc->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
535                             "Failed to create query results");
536     goto error;
537   }
538 
539   vars_table = rasqal_query_results_get_variables_table(results);
540 
541   rowsource = rasqal_service_execute_as_rowsource(svc, vars_table);
542   if(!rowsource)
543     goto error;
544 
545   while(1) {
546     rasqal_row* row = rasqal_rowsource_read_row(rowsource);
547     if(!row)
548       break;
549     rasqal_query_results_add_row(results, row);
550   }
551 
552 
553   error:
554   if(rowsource)
555     rasqal_free_rowsource(rowsource);
556 
557   return results;
558 }
559