1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_format_sv.c - Format results in CSV/TSV
4  *
5  * Intended to read and write the
6  *   SPARQL 1.1 Query Results CSV and TSV Formats (DRAFT)
7  *   http://www.w3.org/2009/sparql/docs/csv-tsv-results/results-csv-tsv.html
8  *
9  * Copyright (C) 2009-2011, David Beckett http://www.dajobe.org/
10  *
11  * This package is Free Software and part of Redland http://librdf.org/
12  *
13  * It is licensed under the following three licenses as alternatives:
14  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
15  *   2. GNU General Public License (GPL) V2 or any newer version
16  *   3. Apache License, V2.0 or any newer version
17  *
18  * You may not use this file except in compliance with at least one of
19  * the above three licenses.
20  *
21  * See LICENSE.html or LICENSE.txt at the top of this package for the
22  * complete terms and further detail along with the license texts for
23  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
24  *
25  *
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <rasqal_config.h>
30 #endif
31 
32 #ifdef WIN32
33 #include <win32_rasqal_config.h>
34 #endif
35 
36 #include <stdio.h>
37 #include <string.h>
38 #ifdef HAVE_STDLIB_H
39 #include <stdlib.h>
40 #endif
41 #include <stdarg.h>
42 
43 #ifndef FILE_READ_BUF_SIZE
44 #ifdef BUFSIZ
45 #define FILE_READ_BUF_SIZE BUFSIZ
46 #else
47 #define FILE_READ_BUF_SIZE 1024
48 #endif
49 #endif
50 
51 #include "rasqal.h"
52 #include "rasqal_internal.h"
53 
54 #include "sv_config.h"
55 
56 #include "sv.h"
57 
58 static int
rasqal_iostream_write_csv_string(const unsigned char * string,size_t len,raptor_iostream * iostr)59 rasqal_iostream_write_csv_string(const unsigned char *string, size_t len,
60                                  raptor_iostream *iostr)
61 {
62   const char delim = '\x22';
63   int quoting_needed = 0;
64   size_t i;
65 
66   for(i = 0; i < len; i++) {
67     char c = RASQAL_GOOD_CAST(char, string[i]);
68     /* Quoting needed for delim (double quote), comma, linefeed or return */
69     if(c == delim   || c == ',' || c == '\r' || c == '\n') {
70       quoting_needed++;
71       break;
72     }
73   }
74   if(!quoting_needed)
75     return raptor_iostream_counted_string_write(string, len, iostr);
76 
77   raptor_iostream_write_byte(delim, iostr);
78   for(i = 0; i < len; i++) {
79     char c = RASQAL_GOOD_CAST(char, string[i]);
80     if(c == delim)
81       raptor_iostream_write_byte(delim, iostr);
82     raptor_iostream_write_byte(c, iostr);
83   }
84   raptor_iostream_write_byte(delim, iostr);
85 
86   return 0;
87 }
88 
89 /*
90  * rasqal_query_results_write_sv:
91  * @iostr: #raptor_iostream to write the query to
92  * @results: #rasqal_query_results query results format
93  * @base_uri: #raptor_uri base URI of the output format
94  * @label: name of this format for errors
95  * @sep: column sep character
96  * @csv_escape: non-0 if values are written escaped with CSV rules, else turtle
97  * @variable_prefix: char to print before a variable name or NUL
98  * @eol_str: end of line string
99  * @eol_str_len: length of @eol_str
100  *
101  * INTERNAL - Write a @sep-separated values version of the query results format to an iostream.
102  *
103  * If the writing succeeds, the query results will be exhausted.
104  *
105  * Return value: non-0 on failure
106  **/
107 static int
rasqal_query_results_write_sv(raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri,const char * label,const char sep,int csv_escape,const char variable_prefix,const char * eol_str,size_t eol_str_len)108 rasqal_query_results_write_sv(raptor_iostream *iostr,
109                               rasqal_query_results* results,
110                               raptor_uri *base_uri,
111                               const char* label,
112                               const char sep,
113                               int csv_escape,
114                               const char variable_prefix,
115                               const char* eol_str,
116                               size_t eol_str_len)
117 {
118   rasqal_query* query = rasqal_query_results_get_query(results);
119   int i;
120   int vars_count;
121   int emit_mkr;
122 
123   if(!strcmp(label, (const char*)"mkr"))
124     emit_mkr = 1;
125   else
126     emit_mkr = 0;
127 
128   if(!rasqal_query_results_is_bindings(results)) {
129     rasqal_log_error_simple(query->world, RAPTOR_LOG_LEVEL_ERROR,
130                             &query->locator,
131                             "Can only write %s format for variable binding results",
132                             label);
133     return 1;
134   }
135 
136   if(emit_mkr) {
137     raptor_iostream_counted_string_write("result is relation with format = csv;\n", 38, iostr);
138     raptor_iostream_counted_string_write("begin relation result;\n", 23, iostr);
139   }
140 
141   /* Header */
142   for(i = 0; 1; i++) {
143     const unsigned char *name;
144 
145     name = rasqal_query_results_get_binding_name(results, i);
146     if(!name)
147       break;
148 
149     if(i > 0)
150       raptor_iostream_write_byte(sep, iostr);
151 
152     if(variable_prefix)
153       raptor_iostream_write_byte(variable_prefix, iostr);
154     raptor_iostream_string_write(name, iostr);
155   }
156   if(emit_mkr)
157     raptor_iostream_counted_string_write(";", 1, iostr);
158   raptor_iostream_counted_string_write(eol_str, eol_str_len, iostr);
159 
160 
161   /* Variable Binding Results */
162   vars_count = rasqal_query_results_get_bindings_count(results);
163   while(!rasqal_query_results_finished(results)) {
164     /* Result row */
165     for(i = 0; i < vars_count; i++) {
166       rasqal_literal *l = rasqal_query_results_get_binding_value(results, i);
167 
168       if(i > 0)
169         raptor_iostream_write_byte(sep, iostr);
170 
171       if(l) {
172         const unsigned char* str;
173         size_t len;
174 
175         switch(l->type) {
176           case RASQAL_LITERAL_URI:
177             str = RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_counted_string(l->value.uri, &len));
178             if(csv_escape)
179               rasqal_iostream_write_csv_string(str, len, iostr);
180             else {
181               raptor_iostream_write_byte('<', iostr);
182               if(str && len > 0)
183                 raptor_string_ntriples_write(str, len, '"', iostr);
184               raptor_iostream_write_byte('>', iostr);
185             }
186             break;
187 
188           case RASQAL_LITERAL_BLANK:
189             raptor_bnodeid_ntriples_write(l->string, l->string_len, iostr);
190             break;
191 
192           case RASQAL_LITERAL_STRING:
193             if(csv_escape) {
194               rasqal_iostream_write_csv_string(l->string, l->string_len, iostr);
195             } else {
196               if(l->datatype && l->valid) {
197                 rasqal_literal_type ltype;
198                 ltype = rasqal_xsd_datatype_uri_to_type(l->world, l->datatype);
199 
200                 if(ltype >= RASQAL_LITERAL_INTEGER &&
201                    ltype <= RASQAL_LITERAL_DECIMAL) {
202                   /* write integer, float, double and decimal XSD typed
203                    * data without quotes, datatype or language
204                    */
205                   raptor_string_ntriples_write(l->string, l->string_len, '\0', iostr);
206                   break;
207                 }
208               }
209 
210               raptor_iostream_write_byte('"', iostr);
211               raptor_string_ntriples_write(l->string, l->string_len, '"', iostr);
212               raptor_iostream_write_byte('"', iostr);
213 
214               if(l->language) {
215                 raptor_iostream_write_byte('@', iostr);
216                 raptor_iostream_string_write(RASQAL_GOOD_CAST(const unsigned char*, l->language), iostr);
217               }
218 
219               if(l->datatype) {
220                 raptor_iostream_string_write("^^<", iostr);
221                 str = RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_counted_string(l->datatype, &len));
222                 raptor_string_ntriples_write(str, len, '"', iostr);
223                 raptor_iostream_write_byte('>', iostr);
224               }
225             }
226 
227             break;
228 
229           case RASQAL_LITERAL_PATTERN:
230           case RASQAL_LITERAL_QNAME:
231           case RASQAL_LITERAL_INTEGER:
232           case RASQAL_LITERAL_XSD_STRING:
233           case RASQAL_LITERAL_BOOLEAN:
234           case RASQAL_LITERAL_DOUBLE:
235           case RASQAL_LITERAL_FLOAT:
236           case RASQAL_LITERAL_VARIABLE:
237           case RASQAL_LITERAL_DECIMAL:
238           case RASQAL_LITERAL_DATE:
239           case RASQAL_LITERAL_DATETIME:
240           case RASQAL_LITERAL_UDT:
241           case RASQAL_LITERAL_INTEGER_SUBTYPE:
242 
243           case RASQAL_LITERAL_UNKNOWN:
244           default:
245             rasqal_log_error_simple(query->world, RAPTOR_LOG_LEVEL_ERROR,
246                                     &query->locator,
247                                     "Cannot turn literal type %u into %s",
248                                     l->type, label);
249         }
250       }
251 
252       /* End Binding */
253     }
254 
255     /* End Result Row */
256     if(emit_mkr)
257       raptor_iostream_counted_string_write(";", 1, iostr);
258     raptor_iostream_counted_string_write(eol_str, eol_str_len, iostr);
259 
260     rasqal_query_results_next(results);
261   }
262   if(emit_mkr)
263     raptor_iostream_counted_string_write("end relation result;\n", 21, iostr);
264 
265   /* end sparql */
266   return 0;
267 }
268 
269 
270 static int
rasqal_query_results_write_csv(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)271 rasqal_query_results_write_csv(rasqal_query_results_formatter* formatter,
272                                raptor_iostream *iostr,
273                                rasqal_query_results* results,
274                                raptor_uri *base_uri)
275 {
276   return rasqal_query_results_write_sv(iostr, results, base_uri,
277                                        "CSV", ',', 1, '\0',
278                                        "\r\n", 2);
279 }
280 
281 
282 static int
rasqal_query_results_write_mkr(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)283 rasqal_query_results_write_mkr(rasqal_query_results_formatter* formatter,
284                                raptor_iostream *iostr,
285                                rasqal_query_results* results,
286                                raptor_uri *base_uri)
287 {
288   return rasqal_query_results_write_sv(iostr, results, base_uri,
289                                        "mkr", ',', 1, '\0',
290                                        "\n", 1);
291 }
292 
293 
294 static int
rasqal_query_results_write_tsv(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)295 rasqal_query_results_write_tsv(rasqal_query_results_formatter* formatter,
296                                raptor_iostream *iostr,
297                                rasqal_query_results* results,
298                                raptor_uri *base_uri)
299 {
300   return rasqal_query_results_write_sv(iostr, results, base_uri,
301                                        "TSV", '\t', 0, '?',
302                                        "\n", 1);
303 }
304 
305 
306 
307 typedef struct
308 {
309   rasqal_world* world;
310   rasqal_rowsource* rowsource;
311 
312   int failed;
313 
314   /* Input fields */
315   raptor_uri* base_uri;
316   raptor_iostream* iostr;
317 
318   raptor_locator locator;
319 
320   /* SV processing */
321   int emit_mkr;  /* Non 0 for mKR relation */
322   char sep;
323   sv* t;
324   char buffer[FILE_READ_BUF_SIZE]; /* iostream read buffer */
325   int offset; /* current result row number */
326 
327   /* Output fields */
328   raptor_sequence* results_sequence; /* saved result rows */
329 
330   /* Variables table allocated for variables in the result set */
331   rasqal_variables_table* vars_table;
332   size_t variables_count;
333 
334   unsigned int flags;
335 
336   int data_is_turtle;
337 } rasqal_rowsource_sv_context;
338 
339 
340 static sv_status_t
rasqal_rowsource_sv_header_callback(sv * t,void * user_data,char ** fields,size_t * widths,size_t count)341 rasqal_rowsource_sv_header_callback(sv *t, void *user_data,
342                                     char** fields, size_t *widths,
343                                     size_t count)
344 {
345   rasqal_rowsource_sv_context* con;
346   unsigned i;
347 
348   con = (rasqal_rowsource_sv_context*)user_data;
349 
350   con->variables_count = count;
351 
352   for(i = 0; i < count; i++) {
353     rasqal_variable *v;
354     char *p = fields[i];
355     size_t len = widths[i];
356 
357     if(*p == '?') {
358       p++;
359       len--;
360     }
361 
362     v = rasqal_variables_table_add2(con->vars_table,
363                                     RASQAL_VARIABLE_TYPE_NORMAL,
364                                     RASQAL_GOOD_CAST(const unsigned char*, p),
365                                     len, NULL);
366     if(v) {
367       rasqal_rowsource_add_variable(con->rowsource, v);
368       /* above function takes a reference to v */
369       rasqal_free_variable(v);
370     }
371   }
372 
373   return SV_STATUS_OK;
374 }
375 
376 
377 static sv_status_t
rasqal_rowsource_sv_data_callback(sv * t,void * user_data,char ** fields,size_t * widths,size_t count)378 rasqal_rowsource_sv_data_callback(sv *t, void *user_data,
379                                   char** fields, size_t *widths,
380                                   size_t count)
381 {
382   rasqal_rowsource_sv_context* con;
383   rasqal_row* row;
384   unsigned i;
385 
386   con = (rasqal_rowsource_sv_context*)user_data;
387 
388   row = rasqal_new_row(con->rowsource);
389   if(!row)
390     goto fail;
391 
392   RASQAL_DEBUG2("Made new row %d\n", con->offset);
393   con->offset++;
394 
395   for(i = 0; i < count; i++) {
396     char* field = fields[i];
397     size_t field_len = widths[i];
398     rasqal_literal* l;
399 
400     if(!field_len) {
401       /* missing */
402       l = NULL;
403     } else if(con->data_is_turtle) {
404       l = rasqal_new_literal_from_ntriples_counted_string(con->world,
405                                                           RASQAL_GOOD_CAST(unsigned char*,field),
406                                                           field_len);
407       if(!l)
408         goto fail;
409     } else {
410       unsigned char* lvalue;
411 
412       lvalue = RASQAL_MALLOC(unsigned char*, field_len + 1);
413       if(!lvalue)
414         goto fail;
415 
416       if(!widths[i])
417         *lvalue = '\0';
418       else
419         memcpy(lvalue, field, field_len + 1);
420 
421       l = rasqal_new_string_literal_node(con->world, lvalue, NULL, NULL);
422       if(!l)
423         goto fail;
424     }
425 
426     rasqal_row_set_value_at(row, RASQAL_GOOD_CAST(int, i), l);
427     if(l) {
428       RASQAL_DEBUG4("Saving row result %d %s value at offset %u\n",
429                     con->offset, rasqal_literal_type_label(l->type), i);
430       rasqal_free_literal(l);
431     } else {
432       RASQAL_DEBUG3("Saving row result %d NULL value at offset %u\n",
433                     con->offset, i);
434     }
435   }
436   raptor_sequence_push(con->results_sequence, row);
437 
438   return SV_STATUS_OK;
439 
440   fail:
441   rasqal_free_row(row);
442   return SV_STATUS_NO_MEMORY;
443 }
444 
445 
446 static int
rasqal_rowsource_sv_init(rasqal_rowsource * rowsource,void * user_data)447 rasqal_rowsource_sv_init(rasqal_rowsource* rowsource, void *user_data)
448 {
449   rasqal_rowsource_sv_context* con;
450 
451   con = (rasqal_rowsource_sv_context*)user_data;
452 
453   con->rowsource = rowsource;
454 
455   con->t = sv_new(con,
456                   rasqal_rowsource_sv_header_callback,
457                   rasqal_rowsource_sv_data_callback,
458                   con->sep);
459   if(!con->t)
460     return 1;
461 
462   if(con->data_is_turtle)
463     sv_set_option(con->t, SV_OPTION_QUOTED_FIELDS, 0L);
464 
465   return 0;
466 }
467 
468 
469 static int
rasqal_rowsource_sv_finish(rasqal_rowsource * rowsource,void * user_data)470 rasqal_rowsource_sv_finish(rasqal_rowsource* rowsource, void *user_data)
471 {
472   rasqal_rowsource_sv_context* con;
473 
474   con = (rasqal_rowsource_sv_context*)user_data;
475 
476   if(con->t)
477     sv_free(con->t);
478 
479   if(con->base_uri)
480     raptor_free_uri(con->base_uri);
481 
482   if(con->results_sequence)
483     raptor_free_sequence(con->results_sequence);
484 
485   if(con->vars_table)
486     rasqal_free_variables_table(con->vars_table);
487 
488   if(con->flags) {
489     if(con->iostr)
490       raptor_free_iostream(con->iostr);
491   }
492 
493   RASQAL_FREE(rasqal_rowsource_sv_context, con);
494 
495   return 0;
496 }
497 
498 
499 static void
rasqal_rowsource_sv_process(rasqal_rowsource_sv_context * con)500 rasqal_rowsource_sv_process(rasqal_rowsource_sv_context* con)
501 {
502   if(raptor_sequence_size(con->results_sequence) && con->variables_count > 0)
503     return;
504 
505   /* do some parsing - need some results */
506   while(!raptor_iostream_read_eof(con->iostr)) {
507     size_t read_len;
508 
509     read_len = RASQAL_BAD_CAST(size_t,
510                                raptor_iostream_read_bytes(RASQAL_GOOD_CAST(char*, con->buffer), 1,
511                                                           FILE_READ_BUF_SIZE,
512                                                           con->iostr));
513     if(read_len > 0) {
514       sv_status_t status;
515 
516       RASQAL_DEBUG2("processing %d bytes\n", RASQAL_GOOD_CAST(int, read_len));
517 
518       status = sv_parse_chunk(con->t, con->buffer, read_len);
519       if(status != SV_STATUS_OK) {
520         con->failed++;
521         break;
522       }
523     }
524 
525     if(read_len < FILE_READ_BUF_SIZE) {
526       /* finished */
527       break;
528     }
529 
530     /* end with variables sequence done AND at least one row */
531     if(con->variables_count > 0 &&
532        raptor_sequence_size(con->results_sequence) > 0)
533       break;
534   }
535 }
536 
537 
538 static int
rasqal_rowsource_sv_ensure_variables(rasqal_rowsource * rowsource,void * user_data)539 rasqal_rowsource_sv_ensure_variables(rasqal_rowsource* rowsource,
540                                              void *user_data)
541 {
542   rasqal_rowsource_sv_context* con;
543 
544   con = (rasqal_rowsource_sv_context*)user_data;
545 
546   rasqal_rowsource_sv_process(con);
547 
548   return con->failed;
549 }
550 
551 
552 static rasqal_row*
rasqal_rowsource_sv_read_row(rasqal_rowsource * rowsource,void * user_data)553 rasqal_rowsource_sv_read_row(rasqal_rowsource* rowsource,
554                                      void *user_data)
555 {
556   rasqal_rowsource_sv_context* con;
557   rasqal_row* row=NULL;
558 
559   con = (rasqal_rowsource_sv_context*)user_data;
560 
561   rasqal_rowsource_sv_process(con);
562 
563   if(!con->failed && raptor_sequence_size(con->results_sequence) > 0) {
564     RASQAL_DEBUG1("getting row from stored sequence\n");
565     row=(rasqal_row*)raptor_sequence_unshift(con->results_sequence);
566   }
567 
568   return row;
569 }
570 
571 
572 
573 
574 
575 static const rasqal_rowsource_handler rasqal_rowsource_csv_handler={
576   /* .version = */ 1,
577   "CSV",
578   /* .init = */ rasqal_rowsource_sv_init,
579   /* .finish = */ rasqal_rowsource_sv_finish,
580   /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
581   /* .read_row = */ rasqal_rowsource_sv_read_row,
582   /* .read_all_rows = */ NULL,
583   /* .reset = */ NULL,
584   /* .set_requirements = */ NULL,
585   /* .get_inner_rowsource = */ NULL,
586   /* .set_origin = */ NULL,
587 };
588 
589 static const rasqal_rowsource_handler rasqal_rowsource_mkr_handler={
590   /* .version = */ 1,
591   "mkr",
592   /* .init = */ rasqal_rowsource_sv_init,
593   /* .finish = */ rasqal_rowsource_sv_finish,
594   /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
595   /* .read_row = */ rasqal_rowsource_sv_read_row,
596   /* .read_all_rows = */ NULL,
597   /* .reset = */ NULL,
598   /* .set_requirements = */ NULL,
599   /* .get_inner_rowsource = */ NULL,
600   /* .set_origin = */ NULL,
601 };
602 
603 static const rasqal_rowsource_handler rasqal_rowsource_tsv_handler={
604   /* .version = */ 1,
605   "TSV",
606   /* .init = */ rasqal_rowsource_sv_init,
607   /* .finish = */ rasqal_rowsource_sv_finish,
608   /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
609   /* .read_row = */ rasqal_rowsource_sv_read_row,
610   /* .read_all_rows = */ NULL,
611   /* .reset = */ NULL,
612   /* .set_requirements = */ NULL,
613   /* .get_inner_rowsource = */ NULL,
614   /* .set_origin = */ NULL,
615 };
616 
617 
618 
619 /*
620  * rasqal_query_results_getrowsource_csv:
621  * @world: rasqal world object
622  * @iostr: #raptor_iostream to read the query results from
623  * @base_uri: #raptor_uri base URI of the input format
624  *
625  * INTERNAL - Read SPARQL CSV query results format from an iostream
626  * in a format returning a rowsource.
627  *
628  * Return value: a new rasqal_rowsource or NULL on failure
629  **/
630 static rasqal_rowsource*
rasqal_query_results_get_rowsource_csv(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)631 rasqal_query_results_get_rowsource_csv(rasqal_query_results_formatter* formatter,
632                                        rasqal_world *world,
633                                        rasqal_variables_table* vars_table,
634                                        raptor_iostream *iostr,
635                                        raptor_uri *base_uri,
636                                        unsigned int flags)
637 {
638   rasqal_rowsource_sv_context* con;
639 
640   con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
641   if(!con)
642     return NULL;
643 
644   con->world = world;
645   con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
646   con->iostr = iostr;
647 
648   con->locator.uri = base_uri;
649 
650   con->flags = flags;
651 
652   con->emit_mkr = 0;
653 
654   con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
655 
656   con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
657 
658   con->sep = ',';
659 
660   return rasqal_new_rowsource_from_handler(world, NULL,
661                                            con,
662                                            &rasqal_rowsource_csv_handler,
663                                            con->vars_table,
664                                            0);
665 }
666 
667 /*
668  * rasqal_query_results_getrowsource_mkr:
669  * @world: rasqal world object
670  * @iostr: #raptor_iostream to read the query results from
671  * @base_uri: #raptor_uri base URI of the input format
672  *
673  * INTERNAL - Read SPARQL mKR query results format from an iostream
674  * in a format returning a rowsource.
675  *
676  * Return value: a new rasqal_rowsource or NULL on failure
677  **/
678 static rasqal_rowsource*
rasqal_query_results_get_rowsource_mkr(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)679 rasqal_query_results_get_rowsource_mkr(rasqal_query_results_formatter* formatter,
680                                        rasqal_world *world,
681                                        rasqal_variables_table* vars_table,
682                                        raptor_iostream *iostr,
683                                        raptor_uri *base_uri,
684                                        unsigned int flags)
685 {
686   rasqal_rowsource_sv_context* con;
687 
688   con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
689   if(!con)
690     return NULL;
691 
692   con->world = world;
693   con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
694   con->iostr = iostr;
695 
696   con->locator.uri = base_uri;
697 
698   con->flags = flags;
699 
700   con->emit_mkr = 1;
701 
702   con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
703 
704   con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
705 
706   con->sep = ',';
707 
708   return rasqal_new_rowsource_from_handler(world, NULL,
709                                            con,
710                                            &rasqal_rowsource_mkr_handler,
711                                            con->vars_table,
712                                            0);
713 }
714 
715 /*
716  * rasqal_query_results_getrowsource_tsv:
717  * @world: rasqal world object
718  * @iostr: #raptor_iostream to read the query results from
719  * @base_uri: #raptor_uri base URI of the input format
720  *
721  * INTERNAL - Read SPARQL TSV query results format from an iostream
722  * in a format returning a rowsource.
723  *
724  * Return value: a new rasqal_rowsource or NULL on failure
725  **/
726 static rasqal_rowsource*
rasqal_query_results_get_rowsource_tsv(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)727 rasqal_query_results_get_rowsource_tsv(rasqal_query_results_formatter* formatter,
728                                        rasqal_world *world,
729                                        rasqal_variables_table* vars_table,
730                                        raptor_iostream *iostr,
731                                        raptor_uri *base_uri,
732                                        unsigned int flags)
733 {
734   rasqal_rowsource_sv_context* con;
735 
736   con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
737   if(!con)
738     return NULL;
739 
740   con->world = world;
741   con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
742   con->iostr = iostr;
743 
744   con->locator.uri = base_uri;
745 
746   con->flags = flags;
747 
748   con->emit_mkr = 0;
749 
750   con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
751 
752   con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
753 
754   con->sep = '\t';
755   con->data_is_turtle = 1;
756 
757   return rasqal_new_rowsource_from_handler(world, NULL,
758                                            con,
759                                            &rasqal_rowsource_tsv_handler,
760                                            con->vars_table,
761                                            0);
762 }
763 
764 
765 
766 /*
767  * Calculate score for buffer based on score of number of 'sep' chars
768  * in first line; minimum @min_count gives a based score, boosted if
769  * more than @boost_count
770  */
771 static int
rasqal_query_results_sv_score_first_line(const unsigned char * p,size_t len,const char sep,unsigned int min_count,unsigned int boost_count)772 rasqal_query_results_sv_score_first_line(const unsigned char* p, size_t len,
773                                          const char sep,
774                                          unsigned int min_count,
775                                          unsigned int boost_count)
776 {
777   unsigned int count = 0;
778   int score = 0;
779 
780   if(!p || !len)
781     return 0;
782 
783   for(; (len && *p && *p !='\r' && *p != '\n'); p++, len--) {
784     if(*p == sep) {
785       count++;
786 
787       if(count >= min_count) {
788         score = 6;
789 
790         if(count >= boost_count) {
791           score += 2;
792           /* if the score is this high, we can end */
793           break;
794         }
795       }
796     }
797   }
798   return score;
799 }
800 
801 
802 static int
rasqal_query_results_csv_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)803 rasqal_query_results_csv_recognise_syntax(rasqal_query_results_format_factory* factory,
804                                           const unsigned char *buffer,
805                                           size_t len,
806                                           const unsigned char *identifier,
807                                           const unsigned char *suffix,
808                                           const char *mime_type)
809 {
810   int score = 0;
811 
812   if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "csv"))
813     return 7;
814 
815   if(buffer && len) {
816     /* use number of tabs in first line - comma needs higher counts since it
817      * is more likely to appear in text.
818      */
819     score = rasqal_query_results_sv_score_first_line(buffer, len, ',', 5, 7);
820   }
821 
822   return score;
823 }
824 
825 static int
rasqal_query_results_mkr_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)826 rasqal_query_results_mkr_recognise_syntax(rasqal_query_results_format_factory* factory,
827                                           const unsigned char *buffer,
828                                           size_t len,
829                                           const unsigned char *identifier,
830                                           const unsigned char *suffix,
831                                           const char *mime_type)
832 {
833   unsigned int score = 0;
834 
835   if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "mkr"))
836     return 7;
837 
838   if(buffer && len) {
839     /* use number of tabs in first line - comma needs higher counts since it
840      * is more likely to appear in text.
841      */
842     score = rasqal_query_results_sv_score_first_line(buffer, len, ',', 5, 7);
843   }
844 
845   return score;
846 }
847 
848 
849 static int
rasqal_query_results_tsv_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)850 rasqal_query_results_tsv_recognise_syntax(rasqal_query_results_format_factory* factory,
851                                           const unsigned char *buffer,
852                                           size_t len,
853                                           const unsigned char *identifier,
854                                           const unsigned char *suffix,
855                                           const char *mime_type)
856 {
857   int score = 0;
858 
859   if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "tsv"))
860     return 7;
861 
862   if(buffer && len) {
863     /* use number of tabs in first line - tab is more rare so guess
864      * with fewer than csv's comma. */
865     score = rasqal_query_results_sv_score_first_line(buffer, len, '\t', 3, 5);
866   }
867 
868   return score;
869 }
870 
871 
872 
873 
874 
875 static const char* const csv_names[] = { "csv", NULL};
876 
877 static const char* const csv_uri_strings[] = {
878   "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
879   "http://www.w3.org/TR/sparql11-results-csv-tsv/",
880   "http://www.ietf.org/rfc/rfc4180.txt",
881   NULL
882 };
883 
884 static const raptor_type_q csv_types[] = {
885   { "text/csv", 8, 10},
886   { "text/csv; header=present", 24, 10},
887   { NULL, 0, 0}
888 };
889 
890 static int
rasqal_query_results_csv_register_factory(rasqal_query_results_format_factory * factory)891 rasqal_query_results_csv_register_factory(rasqal_query_results_format_factory *factory)
892 {
893   int rc = 0;
894 
895   factory->desc.names = csv_names;
896   factory->desc.mime_types = csv_types;
897 
898   factory->desc.label = "Comma Separated Values (CSV)";
899   factory->desc.uri_strings = csv_uri_strings;
900 
901   factory->desc.flags = 0;
902 
903   factory->write         = rasqal_query_results_write_csv;
904   factory->get_rowsource = rasqal_query_results_get_rowsource_csv;
905   factory->recognise_syntax = rasqal_query_results_csv_recognise_syntax;
906 
907   return rc;
908 }
909 
910 static const char* const mkr_names[] = { "mkr", NULL};
911 
912 static const char* const mkr_uri_strings[] = {
913   NULL
914 };
915 
916 static const raptor_type_q mkr_types[] = {
917   { "text/mkr", 8, 10},
918   { "text/mkr; header=present", 24, 10},
919   { NULL, 0, 0}
920 };
921 
922 static int
rasqal_query_results_mkr_register_factory(rasqal_query_results_format_factory * factory)923 rasqal_query_results_mkr_register_factory(rasqal_query_results_format_factory *factory)
924 {
925   int rc = 0;
926 
927   factory->desc.names = mkr_names;
928   factory->desc.mime_types = mkr_types;
929 
930   factory->desc.label = "mKR relation (mkr)";
931   factory->desc.uri_strings = mkr_uri_strings;
932 
933   factory->desc.flags = 0;
934 
935   factory->write         = rasqal_query_results_write_mkr;
936   factory->get_rowsource = rasqal_query_results_get_rowsource_mkr;
937   factory->recognise_syntax = rasqal_query_results_mkr_recognise_syntax;
938 
939   return rc;
940 }
941 
942 
943 static const char* const tsv_names[] = { "tsv", NULL};
944 
945 static const char* const tsv_uri_strings[] = {
946   "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
947   "http://www.w3.org/TR/sparql11-results-csv-tsv/",
948   "http://www.iana.org/assignments/media-types/text/tab-separated-values",
949   NULL
950 };
951 
952 
953 static const raptor_type_q tsv_types[] = {
954   { "text/tab-separated-values", 25, 10},
955   { NULL, 0, 0}
956 };
957 
958 
959 static int
rasqal_query_results_tsv_register_factory(rasqal_query_results_format_factory * factory)960 rasqal_query_results_tsv_register_factory(rasqal_query_results_format_factory *factory)
961 {
962   int rc = 0;
963 
964   factory->desc.names = tsv_names;
965   factory->desc.mime_types = tsv_types;
966 
967   factory->desc.label = "Tab Separated Values (TSV)";
968   factory->desc.uri_strings = tsv_uri_strings;
969 
970   factory->desc.flags = 0;
971 
972   factory->write         = rasqal_query_results_write_tsv;
973   factory->get_rowsource = rasqal_query_results_get_rowsource_tsv;
974   factory->recognise_syntax = rasqal_query_results_tsv_recognise_syntax;
975 
976   return rc;
977 }
978 
979 
980 int
rasqal_init_result_format_sv(rasqal_world * world)981 rasqal_init_result_format_sv(rasqal_world* world)
982 {
983   if(!rasqal_world_register_query_results_format_factory(world,
984                                                          &rasqal_query_results_csv_register_factory))
985     return 1;
986 
987   if(!rasqal_world_register_query_results_format_factory(world,
988                                                          &rasqal_query_results_mkr_register_factory))
989     return 1;
990 
991   if(!rasqal_world_register_query_results_format_factory(world,
992                                                          &rasqal_query_results_tsv_register_factory))
993     return 1;
994 
995   return 0;
996 }
997