1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * rasqal_format_sv.c - Format results in CSV/TSV
4 *
5 * Intended to read and write the
6 * SPARQL 1.1 Query Results CSV and TSV Formats (DRAFT)
7 * http://www.w3.org/2009/sparql/docs/csv-tsv-results/results-csv-tsv.html
8 *
9 * Copyright (C) 2009-2011, David Beckett http://www.dajobe.org/
10 *
11 * This package is Free Software and part of Redland http://librdf.org/
12 *
13 * It is licensed under the following three licenses as alternatives:
14 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
15 * 2. GNU General Public License (GPL) V2 or any newer version
16 * 3. Apache License, V2.0 or any newer version
17 *
18 * You may not use this file except in compliance with at least one of
19 * the above three licenses.
20 *
21 * See LICENSE.html or LICENSE.txt at the top of this package for the
22 * complete terms and further detail along with the license texts for
23 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
24 *
25 *
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include <rasqal_config.h>
30 #endif
31
32 #ifdef WIN32
33 #include <win32_rasqal_config.h>
34 #endif
35
36 #include <stdio.h>
37 #include <string.h>
38 #ifdef HAVE_STDLIB_H
39 #include <stdlib.h>
40 #endif
41 #include <stdarg.h>
42
43 #ifndef FILE_READ_BUF_SIZE
44 #ifdef BUFSIZ
45 #define FILE_READ_BUF_SIZE BUFSIZ
46 #else
47 #define FILE_READ_BUF_SIZE 1024
48 #endif
49 #endif
50
51 #include "rasqal.h"
52 #include "rasqal_internal.h"
53
54 #include "sv_config.h"
55
56 #include "sv.h"
57
58 static int
rasqal_iostream_write_csv_string(const unsigned char * string,size_t len,raptor_iostream * iostr)59 rasqal_iostream_write_csv_string(const unsigned char *string, size_t len,
60 raptor_iostream *iostr)
61 {
62 const char delim = '\x22';
63 int quoting_needed = 0;
64 size_t i;
65
66 for(i = 0; i < len; i++) {
67 char c = RASQAL_GOOD_CAST(char, string[i]);
68 /* Quoting needed for delim (double quote), comma, linefeed or return */
69 if(c == delim || c == ',' || c == '\r' || c == '\n') {
70 quoting_needed++;
71 break;
72 }
73 }
74 if(!quoting_needed)
75 return raptor_iostream_counted_string_write(string, len, iostr);
76
77 raptor_iostream_write_byte(delim, iostr);
78 for(i = 0; i < len; i++) {
79 char c = RASQAL_GOOD_CAST(char, string[i]);
80 if(c == delim)
81 raptor_iostream_write_byte(delim, iostr);
82 raptor_iostream_write_byte(c, iostr);
83 }
84 raptor_iostream_write_byte(delim, iostr);
85
86 return 0;
87 }
88
89 /*
90 * rasqal_query_results_write_sv:
91 * @iostr: #raptor_iostream to write the query to
92 * @results: #rasqal_query_results query results format
93 * @base_uri: #raptor_uri base URI of the output format
94 * @label: name of this format for errors
95 * @sep: column sep character
96 * @csv_escape: non-0 if values are written escaped with CSV rules, else turtle
97 * @variable_prefix: char to print before a variable name or NUL
98 * @eol_str: end of line string
99 * @eol_str_len: length of @eol_str
100 *
101 * INTERNAL - Write a @sep-separated values version of the query results format to an iostream.
102 *
103 * If the writing succeeds, the query results will be exhausted.
104 *
105 * Return value: non-0 on failure
106 **/
107 static int
rasqal_query_results_write_sv(raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri,const char * label,const char sep,int csv_escape,const char variable_prefix,const char * eol_str,size_t eol_str_len)108 rasqal_query_results_write_sv(raptor_iostream *iostr,
109 rasqal_query_results* results,
110 raptor_uri *base_uri,
111 const char* label,
112 const char sep,
113 int csv_escape,
114 const char variable_prefix,
115 const char* eol_str,
116 size_t eol_str_len)
117 {
118 rasqal_query* query = rasqal_query_results_get_query(results);
119 int i;
120 int vars_count;
121 int emit_mkr;
122
123 if(!strcmp(label, (const char*)"mkr"))
124 emit_mkr = 1;
125 else
126 emit_mkr = 0;
127
128 if(!rasqal_query_results_is_bindings(results)) {
129 rasqal_log_error_simple(query->world, RAPTOR_LOG_LEVEL_ERROR,
130 &query->locator,
131 "Can only write %s format for variable binding results",
132 label);
133 return 1;
134 }
135
136 if(emit_mkr) {
137 raptor_iostream_counted_string_write("result is relation with format = csv;\n", 38, iostr);
138 raptor_iostream_counted_string_write("begin relation result;\n", 23, iostr);
139 }
140
141 /* Header */
142 for(i = 0; 1; i++) {
143 const unsigned char *name;
144
145 name = rasqal_query_results_get_binding_name(results, i);
146 if(!name)
147 break;
148
149 if(i > 0)
150 raptor_iostream_write_byte(sep, iostr);
151
152 if(variable_prefix)
153 raptor_iostream_write_byte(variable_prefix, iostr);
154 raptor_iostream_string_write(name, iostr);
155 }
156 if(emit_mkr)
157 raptor_iostream_counted_string_write(";", 1, iostr);
158 raptor_iostream_counted_string_write(eol_str, eol_str_len, iostr);
159
160
161 /* Variable Binding Results */
162 vars_count = rasqal_query_results_get_bindings_count(results);
163 while(!rasqal_query_results_finished(results)) {
164 /* Result row */
165 for(i = 0; i < vars_count; i++) {
166 rasqal_literal *l = rasqal_query_results_get_binding_value(results, i);
167
168 if(i > 0)
169 raptor_iostream_write_byte(sep, iostr);
170
171 if(l) {
172 const unsigned char* str;
173 size_t len;
174
175 switch(l->type) {
176 case RASQAL_LITERAL_URI:
177 str = RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_counted_string(l->value.uri, &len));
178 if(csv_escape)
179 rasqal_iostream_write_csv_string(str, len, iostr);
180 else {
181 raptor_iostream_write_byte('<', iostr);
182 if(str && len > 0)
183 raptor_string_ntriples_write(str, len, '"', iostr);
184 raptor_iostream_write_byte('>', iostr);
185 }
186 break;
187
188 case RASQAL_LITERAL_BLANK:
189 raptor_bnodeid_ntriples_write(l->string, l->string_len, iostr);
190 break;
191
192 case RASQAL_LITERAL_STRING:
193 if(csv_escape) {
194 rasqal_iostream_write_csv_string(l->string, l->string_len, iostr);
195 } else {
196 if(l->datatype && l->valid) {
197 rasqal_literal_type ltype;
198 ltype = rasqal_xsd_datatype_uri_to_type(l->world, l->datatype);
199
200 if(ltype >= RASQAL_LITERAL_INTEGER &&
201 ltype <= RASQAL_LITERAL_DECIMAL) {
202 /* write integer, float, double and decimal XSD typed
203 * data without quotes, datatype or language
204 */
205 raptor_string_ntriples_write(l->string, l->string_len, '\0', iostr);
206 break;
207 }
208 }
209
210 raptor_iostream_write_byte('"', iostr);
211 raptor_string_ntriples_write(l->string, l->string_len, '"', iostr);
212 raptor_iostream_write_byte('"', iostr);
213
214 if(l->language) {
215 raptor_iostream_write_byte('@', iostr);
216 raptor_iostream_string_write(RASQAL_GOOD_CAST(const unsigned char*, l->language), iostr);
217 }
218
219 if(l->datatype) {
220 raptor_iostream_string_write("^^<", iostr);
221 str = RASQAL_GOOD_CAST(const unsigned char*, raptor_uri_as_counted_string(l->datatype, &len));
222 raptor_string_ntriples_write(str, len, '"', iostr);
223 raptor_iostream_write_byte('>', iostr);
224 }
225 }
226
227 break;
228
229 case RASQAL_LITERAL_PATTERN:
230 case RASQAL_LITERAL_QNAME:
231 case RASQAL_LITERAL_INTEGER:
232 case RASQAL_LITERAL_XSD_STRING:
233 case RASQAL_LITERAL_BOOLEAN:
234 case RASQAL_LITERAL_DOUBLE:
235 case RASQAL_LITERAL_FLOAT:
236 case RASQAL_LITERAL_VARIABLE:
237 case RASQAL_LITERAL_DECIMAL:
238 case RASQAL_LITERAL_DATE:
239 case RASQAL_LITERAL_DATETIME:
240 case RASQAL_LITERAL_UDT:
241 case RASQAL_LITERAL_INTEGER_SUBTYPE:
242
243 case RASQAL_LITERAL_UNKNOWN:
244 default:
245 rasqal_log_error_simple(query->world, RAPTOR_LOG_LEVEL_ERROR,
246 &query->locator,
247 "Cannot turn literal type %u into %s",
248 l->type, label);
249 }
250 }
251
252 /* End Binding */
253 }
254
255 /* End Result Row */
256 if(emit_mkr)
257 raptor_iostream_counted_string_write(";", 1, iostr);
258 raptor_iostream_counted_string_write(eol_str, eol_str_len, iostr);
259
260 rasqal_query_results_next(results);
261 }
262 if(emit_mkr)
263 raptor_iostream_counted_string_write("end relation result;\n", 21, iostr);
264
265 /* end sparql */
266 return 0;
267 }
268
269
270 static int
rasqal_query_results_write_csv(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)271 rasqal_query_results_write_csv(rasqal_query_results_formatter* formatter,
272 raptor_iostream *iostr,
273 rasqal_query_results* results,
274 raptor_uri *base_uri)
275 {
276 return rasqal_query_results_write_sv(iostr, results, base_uri,
277 "CSV", ',', 1, '\0',
278 "\r\n", 2);
279 }
280
281
282 static int
rasqal_query_results_write_mkr(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)283 rasqal_query_results_write_mkr(rasqal_query_results_formatter* formatter,
284 raptor_iostream *iostr,
285 rasqal_query_results* results,
286 raptor_uri *base_uri)
287 {
288 return rasqal_query_results_write_sv(iostr, results, base_uri,
289 "mkr", ',', 1, '\0',
290 "\n", 1);
291 }
292
293
294 static int
rasqal_query_results_write_tsv(rasqal_query_results_formatter * formatter,raptor_iostream * iostr,rasqal_query_results * results,raptor_uri * base_uri)295 rasqal_query_results_write_tsv(rasqal_query_results_formatter* formatter,
296 raptor_iostream *iostr,
297 rasqal_query_results* results,
298 raptor_uri *base_uri)
299 {
300 return rasqal_query_results_write_sv(iostr, results, base_uri,
301 "TSV", '\t', 0, '?',
302 "\n", 1);
303 }
304
305
306
307 typedef struct
308 {
309 rasqal_world* world;
310 rasqal_rowsource* rowsource;
311
312 int failed;
313
314 /* Input fields */
315 raptor_uri* base_uri;
316 raptor_iostream* iostr;
317
318 raptor_locator locator;
319
320 /* SV processing */
321 int emit_mkr; /* Non 0 for mKR relation */
322 char sep;
323 sv* t;
324 char buffer[FILE_READ_BUF_SIZE]; /* iostream read buffer */
325 int offset; /* current result row number */
326
327 /* Output fields */
328 raptor_sequence* results_sequence; /* saved result rows */
329
330 /* Variables table allocated for variables in the result set */
331 rasqal_variables_table* vars_table;
332 size_t variables_count;
333
334 unsigned int flags;
335
336 int data_is_turtle;
337 } rasqal_rowsource_sv_context;
338
339
340 static sv_status_t
rasqal_rowsource_sv_header_callback(sv * t,void * user_data,char ** fields,size_t * widths,size_t count)341 rasqal_rowsource_sv_header_callback(sv *t, void *user_data,
342 char** fields, size_t *widths,
343 size_t count)
344 {
345 rasqal_rowsource_sv_context* con;
346 unsigned i;
347
348 con = (rasqal_rowsource_sv_context*)user_data;
349
350 con->variables_count = count;
351
352 for(i = 0; i < count; i++) {
353 rasqal_variable *v;
354 char *p = fields[i];
355 size_t len = widths[i];
356
357 if(*p == '?') {
358 p++;
359 len--;
360 }
361
362 v = rasqal_variables_table_add2(con->vars_table,
363 RASQAL_VARIABLE_TYPE_NORMAL,
364 RASQAL_GOOD_CAST(const unsigned char*, p),
365 len, NULL);
366 if(v) {
367 rasqal_rowsource_add_variable(con->rowsource, v);
368 /* above function takes a reference to v */
369 rasqal_free_variable(v);
370 }
371 }
372
373 return SV_STATUS_OK;
374 }
375
376
377 static sv_status_t
rasqal_rowsource_sv_data_callback(sv * t,void * user_data,char ** fields,size_t * widths,size_t count)378 rasqal_rowsource_sv_data_callback(sv *t, void *user_data,
379 char** fields, size_t *widths,
380 size_t count)
381 {
382 rasqal_rowsource_sv_context* con;
383 rasqal_row* row;
384 unsigned i;
385
386 con = (rasqal_rowsource_sv_context*)user_data;
387
388 row = rasqal_new_row(con->rowsource);
389 if(!row)
390 goto fail;
391
392 RASQAL_DEBUG2("Made new row %d\n", con->offset);
393 con->offset++;
394
395 for(i = 0; i < count; i++) {
396 char* field = fields[i];
397 size_t field_len = widths[i];
398 rasqal_literal* l;
399
400 if(!field_len) {
401 /* missing */
402 l = NULL;
403 } else if(con->data_is_turtle) {
404 l = rasqal_new_literal_from_ntriples_counted_string(con->world,
405 RASQAL_GOOD_CAST(unsigned char*,field),
406 field_len);
407 if(!l)
408 goto fail;
409 } else {
410 unsigned char* lvalue;
411
412 lvalue = RASQAL_MALLOC(unsigned char*, field_len + 1);
413 if(!lvalue)
414 goto fail;
415
416 if(!widths[i])
417 *lvalue = '\0';
418 else
419 memcpy(lvalue, field, field_len + 1);
420
421 l = rasqal_new_string_literal_node(con->world, lvalue, NULL, NULL);
422 if(!l)
423 goto fail;
424 }
425
426 rasqal_row_set_value_at(row, RASQAL_GOOD_CAST(int, i), l);
427 if(l) {
428 RASQAL_DEBUG4("Saving row result %d %s value at offset %u\n",
429 con->offset, rasqal_literal_type_label(l->type), i);
430 rasqal_free_literal(l);
431 } else {
432 RASQAL_DEBUG3("Saving row result %d NULL value at offset %u\n",
433 con->offset, i);
434 }
435 }
436 raptor_sequence_push(con->results_sequence, row);
437
438 return SV_STATUS_OK;
439
440 fail:
441 rasqal_free_row(row);
442 return SV_STATUS_NO_MEMORY;
443 }
444
445
446 static int
rasqal_rowsource_sv_init(rasqal_rowsource * rowsource,void * user_data)447 rasqal_rowsource_sv_init(rasqal_rowsource* rowsource, void *user_data)
448 {
449 rasqal_rowsource_sv_context* con;
450
451 con = (rasqal_rowsource_sv_context*)user_data;
452
453 con->rowsource = rowsource;
454
455 con->t = sv_new(con,
456 rasqal_rowsource_sv_header_callback,
457 rasqal_rowsource_sv_data_callback,
458 con->sep);
459 if(!con->t)
460 return 1;
461
462 if(con->data_is_turtle)
463 sv_set_option(con->t, SV_OPTION_QUOTED_FIELDS, 0L);
464
465 return 0;
466 }
467
468
469 static int
rasqal_rowsource_sv_finish(rasqal_rowsource * rowsource,void * user_data)470 rasqal_rowsource_sv_finish(rasqal_rowsource* rowsource, void *user_data)
471 {
472 rasqal_rowsource_sv_context* con;
473
474 con = (rasqal_rowsource_sv_context*)user_data;
475
476 if(con->t)
477 sv_free(con->t);
478
479 if(con->base_uri)
480 raptor_free_uri(con->base_uri);
481
482 if(con->results_sequence)
483 raptor_free_sequence(con->results_sequence);
484
485 if(con->vars_table)
486 rasqal_free_variables_table(con->vars_table);
487
488 if(con->flags) {
489 if(con->iostr)
490 raptor_free_iostream(con->iostr);
491 }
492
493 RASQAL_FREE(rasqal_rowsource_sv_context, con);
494
495 return 0;
496 }
497
498
499 static void
rasqal_rowsource_sv_process(rasqal_rowsource_sv_context * con)500 rasqal_rowsource_sv_process(rasqal_rowsource_sv_context* con)
501 {
502 if(raptor_sequence_size(con->results_sequence) && con->variables_count > 0)
503 return;
504
505 /* do some parsing - need some results */
506 while(!raptor_iostream_read_eof(con->iostr)) {
507 size_t read_len;
508
509 read_len = RASQAL_BAD_CAST(size_t,
510 raptor_iostream_read_bytes(RASQAL_GOOD_CAST(char*, con->buffer), 1,
511 FILE_READ_BUF_SIZE,
512 con->iostr));
513 if(read_len > 0) {
514 sv_status_t status;
515
516 RASQAL_DEBUG2("processing %d bytes\n", RASQAL_GOOD_CAST(int, read_len));
517
518 status = sv_parse_chunk(con->t, con->buffer, read_len);
519 if(status != SV_STATUS_OK) {
520 con->failed++;
521 break;
522 }
523 }
524
525 if(read_len < FILE_READ_BUF_SIZE) {
526 /* finished */
527 break;
528 }
529
530 /* end with variables sequence done AND at least one row */
531 if(con->variables_count > 0 &&
532 raptor_sequence_size(con->results_sequence) > 0)
533 break;
534 }
535 }
536
537
538 static int
rasqal_rowsource_sv_ensure_variables(rasqal_rowsource * rowsource,void * user_data)539 rasqal_rowsource_sv_ensure_variables(rasqal_rowsource* rowsource,
540 void *user_data)
541 {
542 rasqal_rowsource_sv_context* con;
543
544 con = (rasqal_rowsource_sv_context*)user_data;
545
546 rasqal_rowsource_sv_process(con);
547
548 return con->failed;
549 }
550
551
552 static rasqal_row*
rasqal_rowsource_sv_read_row(rasqal_rowsource * rowsource,void * user_data)553 rasqal_rowsource_sv_read_row(rasqal_rowsource* rowsource,
554 void *user_data)
555 {
556 rasqal_rowsource_sv_context* con;
557 rasqal_row* row=NULL;
558
559 con = (rasqal_rowsource_sv_context*)user_data;
560
561 rasqal_rowsource_sv_process(con);
562
563 if(!con->failed && raptor_sequence_size(con->results_sequence) > 0) {
564 RASQAL_DEBUG1("getting row from stored sequence\n");
565 row=(rasqal_row*)raptor_sequence_unshift(con->results_sequence);
566 }
567
568 return row;
569 }
570
571
572
573
574
575 static const rasqal_rowsource_handler rasqal_rowsource_csv_handler={
576 /* .version = */ 1,
577 "CSV",
578 /* .init = */ rasqal_rowsource_sv_init,
579 /* .finish = */ rasqal_rowsource_sv_finish,
580 /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
581 /* .read_row = */ rasqal_rowsource_sv_read_row,
582 /* .read_all_rows = */ NULL,
583 /* .reset = */ NULL,
584 /* .set_requirements = */ NULL,
585 /* .get_inner_rowsource = */ NULL,
586 /* .set_origin = */ NULL,
587 };
588
589 static const rasqal_rowsource_handler rasqal_rowsource_mkr_handler={
590 /* .version = */ 1,
591 "mkr",
592 /* .init = */ rasqal_rowsource_sv_init,
593 /* .finish = */ rasqal_rowsource_sv_finish,
594 /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
595 /* .read_row = */ rasqal_rowsource_sv_read_row,
596 /* .read_all_rows = */ NULL,
597 /* .reset = */ NULL,
598 /* .set_requirements = */ NULL,
599 /* .get_inner_rowsource = */ NULL,
600 /* .set_origin = */ NULL,
601 };
602
603 static const rasqal_rowsource_handler rasqal_rowsource_tsv_handler={
604 /* .version = */ 1,
605 "TSV",
606 /* .init = */ rasqal_rowsource_sv_init,
607 /* .finish = */ rasqal_rowsource_sv_finish,
608 /* .ensure_variables = */ rasqal_rowsource_sv_ensure_variables,
609 /* .read_row = */ rasqal_rowsource_sv_read_row,
610 /* .read_all_rows = */ NULL,
611 /* .reset = */ NULL,
612 /* .set_requirements = */ NULL,
613 /* .get_inner_rowsource = */ NULL,
614 /* .set_origin = */ NULL,
615 };
616
617
618
619 /*
620 * rasqal_query_results_getrowsource_csv:
621 * @world: rasqal world object
622 * @iostr: #raptor_iostream to read the query results from
623 * @base_uri: #raptor_uri base URI of the input format
624 *
625 * INTERNAL - Read SPARQL CSV query results format from an iostream
626 * in a format returning a rowsource.
627 *
628 * Return value: a new rasqal_rowsource or NULL on failure
629 **/
630 static rasqal_rowsource*
rasqal_query_results_get_rowsource_csv(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)631 rasqal_query_results_get_rowsource_csv(rasqal_query_results_formatter* formatter,
632 rasqal_world *world,
633 rasqal_variables_table* vars_table,
634 raptor_iostream *iostr,
635 raptor_uri *base_uri,
636 unsigned int flags)
637 {
638 rasqal_rowsource_sv_context* con;
639
640 con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
641 if(!con)
642 return NULL;
643
644 con->world = world;
645 con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
646 con->iostr = iostr;
647
648 con->locator.uri = base_uri;
649
650 con->flags = flags;
651
652 con->emit_mkr = 0;
653
654 con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
655
656 con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
657
658 con->sep = ',';
659
660 return rasqal_new_rowsource_from_handler(world, NULL,
661 con,
662 &rasqal_rowsource_csv_handler,
663 con->vars_table,
664 0);
665 }
666
667 /*
668 * rasqal_query_results_getrowsource_mkr:
669 * @world: rasqal world object
670 * @iostr: #raptor_iostream to read the query results from
671 * @base_uri: #raptor_uri base URI of the input format
672 *
673 * INTERNAL - Read SPARQL mKR query results format from an iostream
674 * in a format returning a rowsource.
675 *
676 * Return value: a new rasqal_rowsource or NULL on failure
677 **/
678 static rasqal_rowsource*
rasqal_query_results_get_rowsource_mkr(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)679 rasqal_query_results_get_rowsource_mkr(rasqal_query_results_formatter* formatter,
680 rasqal_world *world,
681 rasqal_variables_table* vars_table,
682 raptor_iostream *iostr,
683 raptor_uri *base_uri,
684 unsigned int flags)
685 {
686 rasqal_rowsource_sv_context* con;
687
688 con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
689 if(!con)
690 return NULL;
691
692 con->world = world;
693 con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
694 con->iostr = iostr;
695
696 con->locator.uri = base_uri;
697
698 con->flags = flags;
699
700 con->emit_mkr = 1;
701
702 con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
703
704 con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
705
706 con->sep = ',';
707
708 return rasqal_new_rowsource_from_handler(world, NULL,
709 con,
710 &rasqal_rowsource_mkr_handler,
711 con->vars_table,
712 0);
713 }
714
715 /*
716 * rasqal_query_results_getrowsource_tsv:
717 * @world: rasqal world object
718 * @iostr: #raptor_iostream to read the query results from
719 * @base_uri: #raptor_uri base URI of the input format
720 *
721 * INTERNAL - Read SPARQL TSV query results format from an iostream
722 * in a format returning a rowsource.
723 *
724 * Return value: a new rasqal_rowsource or NULL on failure
725 **/
726 static rasqal_rowsource*
rasqal_query_results_get_rowsource_tsv(rasqal_query_results_formatter * formatter,rasqal_world * world,rasqal_variables_table * vars_table,raptor_iostream * iostr,raptor_uri * base_uri,unsigned int flags)727 rasqal_query_results_get_rowsource_tsv(rasqal_query_results_formatter* formatter,
728 rasqal_world *world,
729 rasqal_variables_table* vars_table,
730 raptor_iostream *iostr,
731 raptor_uri *base_uri,
732 unsigned int flags)
733 {
734 rasqal_rowsource_sv_context* con;
735
736 con = RASQAL_CALLOC(rasqal_rowsource_sv_context*, 1, sizeof(*con));
737 if(!con)
738 return NULL;
739
740 con->world = world;
741 con->base_uri = base_uri ? raptor_uri_copy(base_uri) : NULL;
742 con->iostr = iostr;
743
744 con->locator.uri = base_uri;
745
746 con->flags = flags;
747
748 con->emit_mkr = 0;
749
750 con->results_sequence = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
751
752 con->vars_table = rasqal_new_variables_table_from_variables_table(vars_table);
753
754 con->sep = '\t';
755 con->data_is_turtle = 1;
756
757 return rasqal_new_rowsource_from_handler(world, NULL,
758 con,
759 &rasqal_rowsource_tsv_handler,
760 con->vars_table,
761 0);
762 }
763
764
765
766 /*
767 * Calculate score for buffer based on score of number of 'sep' chars
768 * in first line; minimum @min_count gives a based score, boosted if
769 * more than @boost_count
770 */
771 static int
rasqal_query_results_sv_score_first_line(const unsigned char * p,size_t len,const char sep,unsigned int min_count,unsigned int boost_count)772 rasqal_query_results_sv_score_first_line(const unsigned char* p, size_t len,
773 const char sep,
774 unsigned int min_count,
775 unsigned int boost_count)
776 {
777 unsigned int count = 0;
778 int score = 0;
779
780 if(!p || !len)
781 return 0;
782
783 for(; (len && *p && *p !='\r' && *p != '\n'); p++, len--) {
784 if(*p == sep) {
785 count++;
786
787 if(count >= min_count) {
788 score = 6;
789
790 if(count >= boost_count) {
791 score += 2;
792 /* if the score is this high, we can end */
793 break;
794 }
795 }
796 }
797 }
798 return score;
799 }
800
801
802 static int
rasqal_query_results_csv_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)803 rasqal_query_results_csv_recognise_syntax(rasqal_query_results_format_factory* factory,
804 const unsigned char *buffer,
805 size_t len,
806 const unsigned char *identifier,
807 const unsigned char *suffix,
808 const char *mime_type)
809 {
810 int score = 0;
811
812 if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "csv"))
813 return 7;
814
815 if(buffer && len) {
816 /* use number of tabs in first line - comma needs higher counts since it
817 * is more likely to appear in text.
818 */
819 score = rasqal_query_results_sv_score_first_line(buffer, len, ',', 5, 7);
820 }
821
822 return score;
823 }
824
825 static int
rasqal_query_results_mkr_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)826 rasqal_query_results_mkr_recognise_syntax(rasqal_query_results_format_factory* factory,
827 const unsigned char *buffer,
828 size_t len,
829 const unsigned char *identifier,
830 const unsigned char *suffix,
831 const char *mime_type)
832 {
833 unsigned int score = 0;
834
835 if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "mkr"))
836 return 7;
837
838 if(buffer && len) {
839 /* use number of tabs in first line - comma needs higher counts since it
840 * is more likely to appear in text.
841 */
842 score = rasqal_query_results_sv_score_first_line(buffer, len, ',', 5, 7);
843 }
844
845 return score;
846 }
847
848
849 static int
rasqal_query_results_tsv_recognise_syntax(rasqal_query_results_format_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)850 rasqal_query_results_tsv_recognise_syntax(rasqal_query_results_format_factory* factory,
851 const unsigned char *buffer,
852 size_t len,
853 const unsigned char *identifier,
854 const unsigned char *suffix,
855 const char *mime_type)
856 {
857 int score = 0;
858
859 if(suffix && !strcmp(RASQAL_GOOD_CAST(const char*, suffix), "tsv"))
860 return 7;
861
862 if(buffer && len) {
863 /* use number of tabs in first line - tab is more rare so guess
864 * with fewer than csv's comma. */
865 score = rasqal_query_results_sv_score_first_line(buffer, len, '\t', 3, 5);
866 }
867
868 return score;
869 }
870
871
872
873
874
875 static const char* const csv_names[] = { "csv", NULL};
876
877 static const char* const csv_uri_strings[] = {
878 "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
879 "http://www.w3.org/TR/sparql11-results-csv-tsv/",
880 "http://www.ietf.org/rfc/rfc4180.txt",
881 NULL
882 };
883
884 static const raptor_type_q csv_types[] = {
885 { "text/csv", 8, 10},
886 { "text/csv; header=present", 24, 10},
887 { NULL, 0, 0}
888 };
889
890 static int
rasqal_query_results_csv_register_factory(rasqal_query_results_format_factory * factory)891 rasqal_query_results_csv_register_factory(rasqal_query_results_format_factory *factory)
892 {
893 int rc = 0;
894
895 factory->desc.names = csv_names;
896 factory->desc.mime_types = csv_types;
897
898 factory->desc.label = "Comma Separated Values (CSV)";
899 factory->desc.uri_strings = csv_uri_strings;
900
901 factory->desc.flags = 0;
902
903 factory->write = rasqal_query_results_write_csv;
904 factory->get_rowsource = rasqal_query_results_get_rowsource_csv;
905 factory->recognise_syntax = rasqal_query_results_csv_recognise_syntax;
906
907 return rc;
908 }
909
910 static const char* const mkr_names[] = { "mkr", NULL};
911
912 static const char* const mkr_uri_strings[] = {
913 NULL
914 };
915
916 static const raptor_type_q mkr_types[] = {
917 { "text/mkr", 8, 10},
918 { "text/mkr; header=present", 24, 10},
919 { NULL, 0, 0}
920 };
921
922 static int
rasqal_query_results_mkr_register_factory(rasqal_query_results_format_factory * factory)923 rasqal_query_results_mkr_register_factory(rasqal_query_results_format_factory *factory)
924 {
925 int rc = 0;
926
927 factory->desc.names = mkr_names;
928 factory->desc.mime_types = mkr_types;
929
930 factory->desc.label = "mKR relation (mkr)";
931 factory->desc.uri_strings = mkr_uri_strings;
932
933 factory->desc.flags = 0;
934
935 factory->write = rasqal_query_results_write_mkr;
936 factory->get_rowsource = rasqal_query_results_get_rowsource_mkr;
937 factory->recognise_syntax = rasqal_query_results_mkr_recognise_syntax;
938
939 return rc;
940 }
941
942
943 static const char* const tsv_names[] = { "tsv", NULL};
944
945 static const char* const tsv_uri_strings[] = {
946 "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
947 "http://www.w3.org/TR/sparql11-results-csv-tsv/",
948 "http://www.iana.org/assignments/media-types/text/tab-separated-values",
949 NULL
950 };
951
952
953 static const raptor_type_q tsv_types[] = {
954 { "text/tab-separated-values", 25, 10},
955 { NULL, 0, 0}
956 };
957
958
959 static int
rasqal_query_results_tsv_register_factory(rasqal_query_results_format_factory * factory)960 rasqal_query_results_tsv_register_factory(rasqal_query_results_format_factory *factory)
961 {
962 int rc = 0;
963
964 factory->desc.names = tsv_names;
965 factory->desc.mime_types = tsv_types;
966
967 factory->desc.label = "Tab Separated Values (TSV)";
968 factory->desc.uri_strings = tsv_uri_strings;
969
970 factory->desc.flags = 0;
971
972 factory->write = rasqal_query_results_write_tsv;
973 factory->get_rowsource = rasqal_query_results_get_rowsource_tsv;
974 factory->recognise_syntax = rasqal_query_results_tsv_recognise_syntax;
975
976 return rc;
977 }
978
979
980 int
rasqal_init_result_format_sv(rasqal_world * world)981 rasqal_init_result_format_sv(rasqal_world* world)
982 {
983 if(!rasqal_world_register_query_results_format_factory(world,
984 &rasqal_query_results_csv_register_factory))
985 return 1;
986
987 if(!rasqal_world_register_query_results_format_factory(world,
988 &rasqal_query_results_mkr_register_factory))
989 return 1;
990
991 if(!rasqal_world_register_query_results_format_factory(world,
992 &rasqal_query_results_tsv_register_factory))
993 return 1;
994
995 return 0;
996 }
997