1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_parse.c - Raptor Parser API
4 *
5 * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 #ifdef HAVE_SYS_STAT_H
41 #include <sys/stat.h>
42 #endif
43 #ifdef HAVE_FCNTL_H
44 #include <fcntl.h>
45 #endif
46
47 /* Raptor includes */
48 #include "raptor2.h"
49 #include "raptor_internal.h"
50
51
52 #ifndef STANDALONE
53
54 /* prototypes for helper functions */
55 static void raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict);
56
57 /* helper methods */
58
59 static void
raptor_free_parser_factory(raptor_parser_factory * factory)60 raptor_free_parser_factory(raptor_parser_factory* factory)
61 {
62 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory);
63
64 if(factory->finish_factory)
65 factory->finish_factory(factory);
66
67 RAPTOR_FREE(raptor_parser_factory, factory);
68 }
69
70
71 /* class methods */
72
73 int
raptor_parsers_init(raptor_world * world)74 raptor_parsers_init(raptor_world *world)
75 {
76 int rc = 0;
77
78 world->parsers = raptor_new_sequence((raptor_data_free_handler)raptor_free_parser_factory, NULL);
79 if(!world->parsers)
80 return 1;
81
82 #ifdef RAPTOR_PARSER_RDFXML
83 rc+= raptor_init_parser_rdfxml(world) != 0;
84 #endif
85
86 #ifdef RAPTOR_PARSER_NTRIPLES
87 rc+= raptor_init_parser_ntriples(world) != 0;
88 #endif
89
90 #ifdef RAPTOR_PARSER_N3
91 rc+= raptor_init_parser_n3(world) != 0;
92 #endif
93
94 #ifdef RAPTOR_PARSER_TURTLE
95 rc+= raptor_init_parser_turtle(world) != 0;
96 #endif
97
98 #ifdef RAPTOR_PARSER_TRIG
99 rc+= raptor_init_parser_trig(world) != 0;
100 #endif
101
102 #ifdef RAPTOR_PARSER_RSS
103 rc+= raptor_init_parser_rss(world) != 0;
104 #endif
105
106 #if defined(RAPTOR_PARSER_GRDDL)
107 rc+= raptor_init_parser_grddl_common(world) != 0;
108
109 #ifdef RAPTOR_PARSER_GRDDL
110 rc+= raptor_init_parser_grddl(world) != 0;
111 #endif
112
113 #endif
114
115 #ifdef RAPTOR_PARSER_GUESS
116 rc+= raptor_init_parser_guess(world) != 0;
117 #endif
118
119 #ifdef RAPTOR_PARSER_RDFA
120 rc+= raptor_init_parser_rdfa(world) != 0;
121 #endif
122
123 #ifdef RAPTOR_PARSER_JSON
124 rc+= raptor_init_parser_json(world) != 0;
125 #endif
126
127 #ifdef RAPTOR_PARSER_NQUADS
128 rc+= raptor_init_parser_nquads(world) != 0;
129 #endif
130
131 return rc;
132 }
133
134
135 /*
136 * raptor_finish_parsers - delete all the registered parsers
137 */
138 void
raptor_parsers_finish(raptor_world * world)139 raptor_parsers_finish(raptor_world *world)
140 {
141 if(world->parsers) {
142 raptor_free_sequence(world->parsers);
143 world->parsers = NULL;
144 }
145 #if defined(RAPTOR_PARSER_GRDDL)
146 raptor_terminate_parser_grddl_common(world);
147 #endif
148 }
149
150
151 /*
152 * raptor_world_register_parser_factory:
153 * @world: raptor world
154 * @factory: pointer to function to call to register the factory
155 *
156 * Internal - Register a parser via parser factory.
157 *
158 * All strings set in the @factory method are shared with the
159 * #raptor_parser_factory
160 *
161 * Return value: new factory object or NULL on failure
162 **/
163 RAPTOR_EXTERN_C
164 raptor_parser_factory*
raptor_world_register_parser_factory(raptor_world * world,int (* factory)(raptor_parser_factory *))165 raptor_world_register_parser_factory(raptor_world* world,
166 int (*factory) (raptor_parser_factory*))
167 {
168 raptor_parser_factory *parser = NULL;
169
170 parser = RAPTOR_CALLOC(raptor_parser_factory*, 1, sizeof(*parser));
171 if(!parser)
172 return NULL;
173
174 parser->world = world;
175
176 parser->desc.mime_types = NULL;
177
178 if(raptor_sequence_push(world->parsers, parser))
179 return NULL; /* on error, parser is already freed by the sequence */
180
181 /* Call the parser registration function on the new object */
182 if(factory(parser))
183 return NULL; /* parser is owned and freed by the parsers sequence */
184
185 if(raptor_syntax_description_validate(&parser->desc)) {
186 raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
187 "Parser description failed to validate\n");
188 goto tidy;
189 }
190
191
192
193 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
194 RAPTOR_DEBUG2("Registered parser %s\n", parser->desc.names[0]);
195 #endif
196
197 return parser;
198
199 /* Clean up on failure */
200 tidy:
201 raptor_free_parser_factory(parser);
202 return NULL;
203 }
204
205
206 /*
207 * raptor_world_get_parser_factory:
208 * @world: world object
209 * @name: the factory name or NULL for the default factory
210 *
211 * INTERNAL - Get a parser factory by name.
212 *
213 * Return value: the factory object or NULL if there is no such factory
214 **/
215 raptor_parser_factory*
raptor_world_get_parser_factory(raptor_world * world,const char * name)216 raptor_world_get_parser_factory(raptor_world *world, const char *name)
217 {
218 raptor_parser_factory *factory = NULL;
219
220 /* return 1st parser if no particular one wanted - why? */
221 if(!name) {
222 factory = (raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0);
223 if(!factory) {
224 RAPTOR_DEBUG1("No (default) parsers registered\n");
225 return NULL;
226 }
227 } else {
228 int i;
229
230 for(i = 0;
231 (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
232 i++) {
233 int namei;
234 const char* fname;
235
236 for(namei = 0; (fname = factory->desc.names[namei]); namei++) {
237 if(!strcmp(fname, name))
238 break;
239 }
240 if(fname)
241 break;
242 }
243 }
244
245 return factory;
246 }
247
248
249 /**
250 * raptor_world_get_parsers_count:
251 * @world: world object
252 *
253 * Get number of parsers
254 *
255 * Return value: number of parsers
256 **/
257 int
raptor_world_get_parsers_count(raptor_world * world)258 raptor_world_get_parsers_count(raptor_world* world)
259 {
260 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
261
262 raptor_world_open(world);
263
264 return raptor_sequence_size(world->parsers);
265 }
266
267
268 /**
269 * raptor_world_get_parser_description:
270 * @world: world object
271 * @counter: index into the list of parsers
272 *
273 * Get parser descriptive syntax information
274 *
275 * Return value: description or NULL if counter is out of range
276 **/
277 const raptor_syntax_description*
raptor_world_get_parser_description(raptor_world * world,unsigned int counter)278 raptor_world_get_parser_description(raptor_world* world,
279 unsigned int counter)
280 {
281 raptor_parser_factory *factory;
282
283 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
284
285 raptor_world_open(world);
286
287 factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers,
288 counter);
289
290 if(!factory)
291 return NULL;
292
293 return &factory->desc;
294 }
295
296
297 /**
298 * raptor_world_is_parser_name:
299 * @world: world object
300 * @name: the syntax name
301 *
302 * Check the name of a parser is known.
303 *
304 * Return value: non 0 if name is a known syntax name
305 */
306 int
raptor_world_is_parser_name(raptor_world * world,const char * name)307 raptor_world_is_parser_name(raptor_world* world, const char *name)
308 {
309 if(!name)
310 return 0;
311
312 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0);
313
314 raptor_world_open(world);
315
316 return (raptor_world_get_parser_factory(world, name) != NULL);
317 }
318
319
320 /**
321 * raptor_new_parser:
322 * @world: world object
323 * @name: the parser name or NULL for default parser
324 *
325 * Constructor - create a new raptor_parser object.
326 *
327 * Return value: a new #raptor_parser object or NULL on failure
328 */
329 raptor_parser*
raptor_new_parser(raptor_world * world,const char * name)330 raptor_new_parser(raptor_world* world, const char *name)
331 {
332 raptor_parser_factory* factory;
333 raptor_parser* rdf_parser;
334
335 RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
336
337 raptor_world_open(world);
338
339 factory = raptor_world_get_parser_factory(world, name);
340 if(!factory)
341 return NULL;
342
343 rdf_parser = RAPTOR_CALLOC(raptor_parser*, 1, sizeof(*rdf_parser));
344 if(!rdf_parser)
345 return NULL;
346
347 rdf_parser->world = world;
348 raptor_statement_init(&rdf_parser->statement, world);
349
350 rdf_parser->context = RAPTOR_CALLOC(void*, 1, factory->context_length);
351 if(!rdf_parser->context) {
352 raptor_free_parser(rdf_parser);
353 return NULL;
354 }
355
356 #ifdef RAPTOR_XML_LIBXML
357 rdf_parser->magic = RAPTOR_LIBXML_MAGIC;
358 #endif
359 rdf_parser->factory = factory;
360
361 /* Bit flags */
362 rdf_parser->failed = 0;
363 rdf_parser->emit_graph_marks = 1;
364 rdf_parser->emitted_default_graph = 0;
365
366 raptor_object_options_init(&rdf_parser->options, RAPTOR_OPTION_AREA_PARSER);
367
368 /* set parsing strictness from default value */
369 raptor_parser_set_strict(rdf_parser,
370 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_STRICT));
371
372 if(factory->init(rdf_parser, name)) {
373 raptor_free_parser(rdf_parser);
374 return NULL;
375 }
376
377 return rdf_parser;
378 }
379
380
381 /**
382 * raptor_new_parser_for_content:
383 * @world: world object
384 * @uri: URI identifying the syntax (or NULL)
385 * @mime_type: mime type identifying the content (or NULL)
386 * @buffer: buffer of content to guess (or NULL)
387 * @len: length of buffer
388 * @identifier: identifier of content (or NULL)
389 *
390 * Constructor - create a new raptor_parser.
391 *
392 * Uses raptor_world_guess_parser_name() to find a parser by scoring
393 * recognition of the syntax by a block of characters, the content
394 * identifier or a mime type. The content identifier is typically a
395 * filename or URI or some other identifier.
396 *
397 * Return value: a new #raptor_parser object or NULL on failure
398 **/
399 raptor_parser*
raptor_new_parser_for_content(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)400 raptor_new_parser_for_content(raptor_world* world,
401 raptor_uri *uri, const char *mime_type,
402 const unsigned char *buffer, size_t len,
403 const unsigned char *identifier)
404 {
405 const char* name;
406
407 RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
408
409 raptor_world_open(world);
410
411 name = raptor_world_guess_parser_name(world, uri, mime_type,
412 buffer, len, identifier);
413 return name ? raptor_new_parser(world, name) : NULL;
414 }
415
416
417 /**
418 * raptor_parser_parse_start:
419 * @rdf_parser: RDF parser
420 * @uri: base URI or may be NULL if no base URI is required
421 *
422 * Start a parse of content with base URI.
423 *
424 * Parsers that need a base URI can be identified using a syntax
425 * description returned by raptor_world_get_parser_description()
426 * statically or raptor_parser_get_description() on a constructed
427 * parser.
428 *
429 * Return value: non-0 on failure, <0 if a required base URI was missing
430 **/
431 int
raptor_parser_parse_start(raptor_parser * rdf_parser,raptor_uri * uri)432 raptor_parser_parse_start(raptor_parser *rdf_parser, raptor_uri *uri)
433 {
434 if((rdf_parser->factory->desc.flags & RAPTOR_SYNTAX_NEED_BASE_URI) && !uri) {
435 raptor_parser_error(rdf_parser, "Missing base URI for %s parser.",
436 rdf_parser->factory->desc.names[0]);
437 return -1;
438 }
439
440 if(uri)
441 uri = raptor_uri_copy(uri);
442
443 if(rdf_parser->base_uri)
444 raptor_free_uri(rdf_parser->base_uri);
445 rdf_parser->base_uri = uri;
446
447 rdf_parser->locator.uri = uri;
448 rdf_parser->locator.line = -1;
449 rdf_parser->locator.column = -1;
450 rdf_parser->locator.byte = -1;
451
452 if(rdf_parser->factory->start)
453 return rdf_parser->factory->start(rdf_parser);
454 else
455 return 0;
456 }
457
458
459
460
461 /**
462 * raptor_parser_parse_chunk:
463 * @rdf_parser: RDF parser
464 * @buffer: content to parse
465 * @len: length of buffer
466 * @is_end: non-0 if this is the end of the content (such as EOF)
467 *
468 * Parse a block of content into triples.
469 *
470 * This method can only be called after raptor_parser_parse_start() has
471 * initialised the parser.
472 *
473 * Return value: non-0 on failure.
474 **/
475 int
raptor_parser_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)476 raptor_parser_parse_chunk(raptor_parser* rdf_parser,
477 const unsigned char *buffer, size_t len, int is_end)
478 {
479 if(rdf_parser->sb)
480 raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1);
481
482 return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end);
483 }
484
485
486 /**
487 * raptor_free_parser:
488 * @parser: #raptor_parser object
489 *
490 * Destructor - destroy a raptor_parser object.
491 *
492 **/
493 void
raptor_free_parser(raptor_parser * rdf_parser)494 raptor_free_parser(raptor_parser* rdf_parser)
495 {
496 if(!rdf_parser)
497 return;
498
499 if(rdf_parser->factory)
500 rdf_parser->factory->terminate(rdf_parser);
501
502 if(rdf_parser->www)
503 raptor_free_www(rdf_parser->www);
504
505 if(rdf_parser->context)
506 RAPTOR_FREE(raptor_parser_context, rdf_parser->context);
507
508 if(rdf_parser->base_uri)
509 raptor_free_uri(rdf_parser->base_uri);
510
511 if(rdf_parser->sb)
512 raptor_free_stringbuffer(rdf_parser->sb);
513
514 raptor_object_options_clear(&rdf_parser->options);
515
516 RAPTOR_FREE(raptor_parser, rdf_parser);
517 }
518
519
520 /**
521 * raptor_parser_parse_file_stream:
522 * @rdf_parser: parser
523 * @stream: FILE* of RDF content
524 * @filename: filename of content or NULL if it has no name
525 * @base_uri: the base URI to use
526 *
527 * Parse RDF content from a FILE*.
528 *
529 * After draining the FILE* stream (EOF), fclose is not called on it.
530 *
531 * Return value: non 0 on failure
532 **/
533 int
raptor_parser_parse_file_stream(raptor_parser * rdf_parser,FILE * stream,const char * filename,raptor_uri * base_uri)534 raptor_parser_parse_file_stream(raptor_parser* rdf_parser,
535 FILE *stream, const char* filename,
536 raptor_uri *base_uri)
537 {
538 int rc = 0;
539 raptor_locator *locator = &rdf_parser->locator;
540
541 if(!stream || !base_uri)
542 return 1;
543
544 locator->line= locator->column = -1;
545 locator->file= filename;
546
547 if(raptor_parser_parse_start(rdf_parser, base_uri))
548 return 1;
549
550 while(!feof(stream)) {
551 size_t len = fread(rdf_parser->buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream);
552 int is_end = (len < RAPTOR_READ_BUFFER_SIZE);
553 rdf_parser->buffer[len] = '\0';
554 rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
555 if(rc || is_end)
556 break;
557 }
558
559 return (rc != 0);
560 }
561
562
563 /**
564 * raptor_parser_parse_file:
565 * @rdf_parser: parser
566 * @uri: URI of RDF content or NULL to read from standard input
567 * @base_uri: the base URI to use (or NULL if the same)
568 *
569 * Parse RDF content at a file URI.
570 *
571 * If @uri is NULL (source is stdin), then the @base_uri is required.
572 *
573 * Return value: non 0 on failure
574 **/
575 int
raptor_parser_parse_file(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)576 raptor_parser_parse_file(raptor_parser* rdf_parser, raptor_uri *uri,
577 raptor_uri *base_uri)
578 {
579 int rc = 0;
580 int free_base_uri = 0;
581 const char *filename = NULL;
582 FILE *fh = NULL;
583 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
584 struct stat buf;
585 #endif
586
587 if(uri) {
588 filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri));
589 if(!filename)
590 return 1;
591
592 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
593 if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
594 raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'",
595 filename);
596 goto cleanup;
597 }
598 #endif
599
600 fh = fopen(filename, "r");
601 if(!fh) {
602 raptor_parser_error(rdf_parser, "file '%s' open failed - %s",
603 filename, strerror(errno));
604 goto cleanup;
605 }
606 if(!base_uri) {
607 base_uri = raptor_uri_copy(uri);
608 free_base_uri = 1;
609 }
610 } else {
611 if(!base_uri)
612 return 1;
613 fh = stdin;
614 }
615
616 rc = raptor_parser_parse_file_stream(rdf_parser, fh, filename, base_uri);
617
618 cleanup:
619 if(uri) {
620 if(fh)
621 fclose(fh);
622 RAPTOR_FREE(char*, filename);
623 }
624 if(free_base_uri)
625 raptor_free_uri(base_uri);
626
627 return rc;
628 }
629
630
631 void
raptor_parser_parse_uri_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)632 raptor_parser_parse_uri_write_bytes(raptor_www* www,
633 void *userdata, const void *ptr,
634 size_t size, size_t nmemb)
635 {
636 raptor_parse_bytes_context* rpbc = (raptor_parse_bytes_context*)userdata;
637 size_t len = size * nmemb;
638
639 if(!rpbc->started) {
640 raptor_uri* base_uri = rpbc->base_uri;
641
642 if(!base_uri) {
643 rpbc->final_uri = raptor_www_get_final_uri(www);
644 /* base URI after URI resolution is finally chosen */
645 base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri;
646 }
647
648 if(raptor_parser_parse_start(rpbc->rdf_parser, base_uri))
649 raptor_www_abort(www, "Parsing failed");
650 rpbc->started = 1;
651 }
652
653 if(raptor_parser_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0))
654 raptor_www_abort(www, "Parsing failed");
655 }
656
657
658 static void
raptor_parser_parse_uri_content_type_handler(raptor_www * www,void * userdata,const char * content_type)659 raptor_parser_parse_uri_content_type_handler(raptor_www* www, void* userdata,
660 const char* content_type)
661 {
662 raptor_parser* rdf_parser = (raptor_parser*)userdata;
663 if(rdf_parser->factory->content_type_handler)
664 rdf_parser->factory->content_type_handler(rdf_parser, content_type);
665 }
666
667
668 int
raptor_parser_set_uri_filter_no_net(void * user_data,raptor_uri * uri)669 raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri)
670 {
671 unsigned char* uri_string = raptor_uri_as_string(uri);
672
673 if(raptor_uri_uri_string_is_file_uri(uri_string))
674 return 0;
675
676 raptor_parser_error((raptor_parser*)user_data,
677 "Network fetch of URI '%s' denied", uri_string);
678 return 1;
679 }
680
681
682 /**
683 * raptor_parser_parse_uri:
684 * @rdf_parser: parser
685 * @uri: URI of RDF content
686 * @base_uri: the base URI to use (or NULL if the same)
687 *
688 * Parse the RDF content at URI.
689 *
690 * Sends an HTTP Accept: header whent the URI is of the HTTP protocol,
691 * see raptor_parser_parse_uri_with_connection() for details including
692 * how the @base_uri is used.
693 *
694 * Return value: non 0 on failure
695 **/
696 int
raptor_parser_parse_uri(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)697 raptor_parser_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri,
698 raptor_uri *base_uri)
699 {
700 return raptor_parser_parse_uri_with_connection(rdf_parser, uri, base_uri,
701 NULL);
702 }
703
704
705 /**
706 * raptor_parser_parse_uri_with_connection:
707 * @rdf_parser: parser
708 * @uri: URI of RDF content
709 * @base_uri: the base URI to use (or NULL if the same)
710 * @connection: connection object pointer or NULL to create a new one
711 *
712 * Parse RDF content at URI using existing WWW connection.
713 *
714 * If @base_uri is not given and during resolution of the URI, a
715 * protocol redirection occurs, the final resolved URI will be
716 * used as the base URI. If redirection does not occur, the
717 * base URI will be @uri.
718 *
719 * If @base_uri is given, it overrides the process above.
720 *
721 * When @connection is NULL and a MIME Type exists for the parser
722 * type, this type is sent in an HTTP Accept: header in the form
723 * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is
724 * prefered rather than the sole answer. The latter part may not be
725 * necessary but should ensure an HTTP 200 response.
726 *
727 * Return value: non 0 on failure
728 **/
729 int
raptor_parser_parse_uri_with_connection(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri,void * connection)730 raptor_parser_parse_uri_with_connection(raptor_parser* rdf_parser,
731 raptor_uri *uri,
732 raptor_uri *base_uri, void *connection)
733 {
734 int ret = 0;
735 raptor_parse_bytes_context rpbc;
736 char* ua = NULL;
737 char* cert_filename = NULL;
738 char* cert_type = NULL;
739 char* cert_passphrase = NULL;
740 int ssl_verify_peer;
741 int ssl_verify_host;
742
743 if(connection) {
744 if(rdf_parser->www)
745 raptor_free_www(rdf_parser->www);
746 rdf_parser->www = raptor_new_www_with_connection(rdf_parser->world,
747 connection);
748 if(!rdf_parser->www)
749 return 1;
750 } else {
751 const char *accept_h;
752
753 if(rdf_parser->www)
754 raptor_free_www(rdf_parser->www);
755 rdf_parser->www = raptor_new_www(rdf_parser->world);
756 if(!rdf_parser->www)
757 return 1;
758
759 accept_h = raptor_parser_get_accept_header(rdf_parser);
760 if(accept_h) {
761 raptor_www_set_http_accept(rdf_parser->www, accept_h);
762 RAPTOR_FREE(char*, accept_h);
763 }
764 }
765
766 rpbc.rdf_parser = rdf_parser;
767 rpbc.base_uri = base_uri;
768 rpbc.final_uri = NULL;
769 rpbc.started = 0;
770
771 if(rdf_parser->uri_filter)
772 raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter,
773 rdf_parser->uri_filter_user_data);
774 else if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
775 raptor_www_set_uri_filter(rdf_parser->www,
776 raptor_parser_set_uri_filter_no_net, rdf_parser);
777
778 raptor_www_set_write_bytes_handler(rdf_parser->www,
779 raptor_parser_parse_uri_write_bytes,
780 &rpbc);
781
782 raptor_www_set_content_type_handler(rdf_parser->www,
783 raptor_parser_parse_uri_content_type_handler,
784 rdf_parser);
785
786 raptor_www_set_http_cache_control(rdf_parser->www,
787 RAPTOR_OPTIONS_GET_STRING(rdf_parser,
788 RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL));
789
790 ua = RAPTOR_OPTIONS_GET_STRING(rdf_parser, RAPTOR_OPTION_WWW_HTTP_USER_AGENT);
791 if(ua)
792 raptor_www_set_user_agent(rdf_parser->www, ua);
793
794 cert_filename = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
795 RAPTOR_OPTION_WWW_CERT_FILENAME);
796 cert_type = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
797 RAPTOR_OPTION_WWW_CERT_TYPE);
798 cert_passphrase = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
799 RAPTOR_OPTION_WWW_CERT_PASSPHRASE);
800 if(cert_filename || cert_type || cert_passphrase)
801 raptor_www_set_ssl_cert_options(rdf_parser->www, cert_filename,
802 cert_type, cert_passphrase);
803
804 ssl_verify_peer = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
805 RAPTOR_OPTION_WWW_SSL_VERIFY_PEER);
806 ssl_verify_host = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
807 RAPTOR_OPTION_WWW_SSL_VERIFY_HOST);
808 raptor_www_set_ssl_verify_options(rdf_parser->www, ssl_verify_peer,
809 ssl_verify_host);
810
811 ret = raptor_www_fetch(rdf_parser->www, uri);
812
813 if(!rpbc.started && !ret)
814 ret = raptor_parser_parse_start(rdf_parser, base_uri);
815
816 if(rpbc.final_uri)
817 raptor_free_uri(rpbc.final_uri);
818
819 if(ret) {
820 raptor_free_www(rdf_parser->www);
821 rdf_parser->www = NULL;
822 return 1;
823 }
824
825 if(raptor_parser_parse_chunk(rdf_parser, NULL, 0, 1))
826 rdf_parser->failed = 1;
827
828 raptor_free_www(rdf_parser->www);
829 rdf_parser->www = NULL;
830
831 return rdf_parser->failed;
832 }
833
834
835 /*
836 * raptor_parser_fatal_error - Fatal Error from a parser - Internal
837 */
838 void
raptor_parser_fatal_error(raptor_parser * parser,const char * message,...)839 raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...)
840 {
841 va_list arguments;
842
843 va_start(arguments, message);
844 if(parser) {
845 parser->failed = 1;
846 raptor_log_error_varargs(parser->world,
847 RAPTOR_LOG_LEVEL_FATAL,
848 &parser->locator,
849 message, arguments);
850 } else
851 raptor_log_error_varargs(NULL,
852 RAPTOR_LOG_LEVEL_FATAL, NULL,
853 message, arguments);
854 va_end(arguments);
855 }
856
857
858 /*
859 * raptor_parser_error - Error from a parser - Internal
860 */
861 void
raptor_parser_error(raptor_parser * parser,const char * message,...)862 raptor_parser_error(raptor_parser* parser, const char *message, ...)
863 {
864 va_list arguments;
865
866 va_start(arguments, message);
867
868 raptor_parser_log_error_varargs(parser, RAPTOR_LOG_LEVEL_ERROR,
869 message, arguments);
870
871 va_end(arguments);
872 }
873
874
875 /**
876 * raptor_parser_log_error_varargs:
877 * @parser: parser (or NULL)
878 * @level: log level
879 * @message: error format message
880 * @arguments: varargs for message
881 *
882 * Error from a parser - Internal.
883 */
884 void
raptor_parser_log_error_varargs(raptor_parser * parser,raptor_log_level level,const char * message,va_list arguments)885 raptor_parser_log_error_varargs(raptor_parser* parser,
886 raptor_log_level level,
887 const char *message, va_list arguments)
888 {
889 if(parser)
890 raptor_log_error_varargs(parser->world,
891 level,
892 &parser->locator,
893 message, arguments);
894 else
895 raptor_log_error_varargs(NULL,
896 level,
897 NULL,
898 message, arguments);
899 }
900
901
902 /*
903 * raptor_parser_warning - Warning from a parser - Internal
904 */
905 void
raptor_parser_warning(raptor_parser * parser,const char * message,...)906 raptor_parser_warning(raptor_parser* parser, const char *message, ...)
907 {
908 va_list arguments;
909
910 va_start(arguments, message);
911
912 if(parser)
913 raptor_log_error_varargs(parser->world,
914 RAPTOR_LOG_LEVEL_WARN,
915 &parser->locator,
916 message, arguments);
917 else
918 raptor_log_error_varargs(NULL,
919 RAPTOR_LOG_LEVEL_WARN,
920 NULL,
921 message, arguments);
922
923 va_end(arguments);
924 }
925
926
927
928 /* PUBLIC FUNCTIONS */
929
930 /**
931 * raptor_parser_set_statement_handler:
932 * @parser: #raptor_parser parser object
933 * @user_data: user data pointer for callback
934 * @handler: new statement callback function
935 *
936 * Set the statement handler function for the parser.
937 *
938 * Use this to set the function to receive statements as the parsing
939 * proceeds. The statement argument to @handler is shared and must be
940 * copied by the caller with raptor_statement_copy().
941 **/
942 void
raptor_parser_set_statement_handler(raptor_parser * parser,void * user_data,raptor_statement_handler handler)943 raptor_parser_set_statement_handler(raptor_parser* parser,
944 void *user_data,
945 raptor_statement_handler handler)
946 {
947 parser->user_data = user_data;
948 parser->statement_handler = handler;
949 }
950
951
952 /**
953 * raptor_parser_set_graph_mark_handler:
954 * @parser: #raptor_parser parser object
955 * @user_data: user data pointer for callback
956 * @handler: new graph callback function
957 *
958 * Set the graph mark handler function for the parser.
959 *
960 * See #raptor_graph_mark_handler and #raptor_graph_mark_flags for
961 * the marks that may be returned by the handler.
962 *
963 **/
964 void
raptor_parser_set_graph_mark_handler(raptor_parser * parser,void * user_data,raptor_graph_mark_handler handler)965 raptor_parser_set_graph_mark_handler(raptor_parser* parser,
966 void *user_data,
967 raptor_graph_mark_handler handler)
968 {
969 parser->user_data = user_data;
970 parser->graph_mark_handler = handler;
971 }
972
973
974 /**
975 * raptor_parser_set_namespace_handler:
976 * @parser: #raptor_parser parser object
977 * @user_data: user data pointer for callback
978 * @handler: new namespace callback function
979 *
980 * Set the namespace handler function for the parser.
981 *
982 * When a prefix/namespace is seen in a parser, call the given
983 * @handler with the prefix string and the #raptor_uri namespace URI.
984 * Either can be NULL for the default prefix or default namespace.
985 *
986 * The handler function does not deal with duplicates so any
987 * namespace may be declared multiple times.
988 *
989 **/
990 void
raptor_parser_set_namespace_handler(raptor_parser * parser,void * user_data,raptor_namespace_handler handler)991 raptor_parser_set_namespace_handler(raptor_parser* parser,
992 void *user_data,
993 raptor_namespace_handler handler)
994 {
995 parser->namespace_handler = handler;
996 parser->namespace_handler_user_data = user_data;
997 }
998
999
1000 /**
1001 * raptor_parser_set_uri_filter:
1002 * @parser: parser object
1003 * @filter: URI filter function
1004 * @user_data: User data to pass to filter function
1005 *
1006 * Set URI filter function for WWW retrieval.
1007 **/
1008 void
raptor_parser_set_uri_filter(raptor_parser * parser,raptor_uri_filter_func filter,void * user_data)1009 raptor_parser_set_uri_filter(raptor_parser* parser,
1010 raptor_uri_filter_func filter,
1011 void *user_data)
1012 {
1013 parser->uri_filter = filter;
1014 parser->uri_filter_user_data = user_data;
1015 }
1016
1017
1018 /**
1019 * raptor_parser_set_option:
1020 * @parser: #raptor_parser parser object
1021 * @option: option to set from enumerated #raptor_option values
1022 * @string: string option value (or NULL)
1023 * @integer: integer option value
1024 *
1025 * Set parser option.
1026 *
1027 * If @string is not NULL and the option type is numeric, the string
1028 * value is converted to an integer and used in preference to @integer.
1029 *
1030 * If @string is NULL and the option type is not numeric, an error is
1031 * returned.
1032 *
1033 * The @string values used are copied.
1034 *
1035 * The allowed options are available via
1036 * raptor_world_get_option_description().
1037 *
1038 * Return value: non 0 on failure or if the option is unknown
1039 **/
1040 int
raptor_parser_set_option(raptor_parser * parser,raptor_option option,const char * string,int integer)1041 raptor_parser_set_option(raptor_parser *parser, raptor_option option,
1042 const char* string, int integer)
1043 {
1044 int rc;
1045
1046 rc = raptor_object_options_set_option(&parser->options, option,
1047 string, integer);
1048 if(option == RAPTOR_OPTION_STRICT && !rc) {
1049 int is_strict = RAPTOR_OPTIONS_GET_NUMERIC(parser, RAPTOR_OPTION_STRICT);
1050 raptor_parser_set_strict(parser, is_strict);
1051 }
1052
1053 return rc;
1054 }
1055
1056
1057 /**
1058 * raptor_parser_get_option:
1059 * @parser: #raptor_parser parser object
1060 * @option: option to get value
1061 * @string_p: pointer to where to store string value
1062 * @integer_p: pointer to where to store integer value
1063 *
1064 * Get parser option.
1065 *
1066 * Any string value returned in *@string_p is shared and must
1067 * be copied by the caller.
1068 *
1069 * The allowed options are available via
1070 * raptor_world_get_option_description().
1071 *
1072 * Return value: option value or < 0 for an illegal option
1073 **/
1074 int
raptor_parser_get_option(raptor_parser * parser,raptor_option option,char ** string_p,int * integer_p)1075 raptor_parser_get_option(raptor_parser *parser, raptor_option option,
1076 char** string_p, int* integer_p)
1077 {
1078 return raptor_object_options_get_option(&parser->options, option,
1079 string_p, integer_p);
1080 }
1081
1082
1083 /**
1084 * raptor_parser_set_strict:
1085 * @rdf_parser: #raptor_parser object
1086 * @is_strict: Non 0 for strict parsing
1087 *
1088 * INTERNAL - Set parser to strict / lax mode.
1089 *
1090 **/
1091 static void
raptor_parser_set_strict(raptor_parser * rdf_parser,int is_strict)1092 raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict)
1093 {
1094 is_strict = (is_strict) ? 1 : 0;
1095
1096 /* Initialise default parser mode */
1097 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING, 0);
1098
1099 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, !is_strict);
1100 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, !is_strict);
1101 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID, !is_strict);
1102 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, 0);
1103 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE, 1);
1104 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL, is_strict);
1105 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES, !is_strict);
1106 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID, 1);
1107 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_TAG_SOUP, !is_strict);
1108 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_MICROFORMATS, !is_strict);
1109 RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_LINK, !is_strict);
1110 }
1111
1112
1113 /**
1114 * raptor_parser_get_name:
1115 * @rdf_parser: #raptor_parser parser object
1116 *
1117 * Get the name of a parser.
1118 *
1119 * Use raptor_parser_get_description() to get the alternate names and
1120 * aliases as well as other descriptive values.
1121 *
1122 * Return value: the short name for the parser.
1123 **/
1124 const char*
raptor_parser_get_name(raptor_parser * rdf_parser)1125 raptor_parser_get_name(raptor_parser *rdf_parser)
1126 {
1127 if(rdf_parser->factory->get_name)
1128 return rdf_parser->factory->get_name(rdf_parser);
1129 else
1130 return rdf_parser->factory->desc.names[0];
1131 }
1132
1133
1134 /**
1135 * raptor_parser_get_description:
1136 * @rdf_parser: #raptor_parser parser object
1137 *
1138 * Get description of the syntaxes of the parser.
1139 *
1140 * The returned description is static and lives as long as the raptor
1141 * library (raptor world).
1142 *
1143 * Return value: description of syntax
1144 **/
1145 const raptor_syntax_description*
raptor_parser_get_description(raptor_parser * rdf_parser)1146 raptor_parser_get_description(raptor_parser *rdf_parser)
1147 {
1148 if(rdf_parser->factory->get_description)
1149 return rdf_parser->factory->get_description(rdf_parser);
1150 else
1151 return &rdf_parser->factory->desc;
1152 }
1153
1154
1155
1156 /**
1157 * raptor_parser_parse_abort:
1158 * @rdf_parser: #raptor_parser parser object
1159 *
1160 * Abort an ongoing parsing.
1161 *
1162 * Causes any ongoing generation of statements by a parser to be
1163 * terminated and the parser to return controlto the application
1164 * as soon as draining any existing buffers.
1165 *
1166 * Most useful inside raptor_parser_parse_file() or
1167 * raptor_parser_parse_uri() when the Raptor library is directing the
1168 * parsing and when one of the callback handlers such as as set by
1169 * raptor_parser_set_statement_handler() requires to return to the main
1170 * application code.
1171 **/
1172 void
raptor_parser_parse_abort(raptor_parser * rdf_parser)1173 raptor_parser_parse_abort(raptor_parser *rdf_parser)
1174 {
1175 rdf_parser->failed = 1;
1176 }
1177
1178
1179 /**
1180 * raptor_parser_get_locator:
1181 * @rdf_parser: raptor parser
1182 *
1183 * Get the current raptor locator object.
1184 *
1185 * Return value: raptor locator
1186 **/
1187 raptor_locator*
raptor_parser_get_locator(raptor_parser * rdf_parser)1188 raptor_parser_get_locator(raptor_parser *rdf_parser)
1189 {
1190 if(rdf_parser->factory->get_locator)
1191 return rdf_parser->factory->get_locator(rdf_parser);
1192 else
1193 return &rdf_parser->locator;
1194 }
1195
1196
1197 #ifdef RAPTOR_DEBUG
1198 void
raptor_stats_print(raptor_parser * rdf_parser,FILE * stream)1199 raptor_stats_print(raptor_parser *rdf_parser, FILE *stream)
1200 {
1201 #ifdef RAPTOR_PARSER_RDFXML
1202 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1203 if(!strcmp(rdf_parser->factory->desc.names[0], "rdfxml")) {
1204 raptor_rdfxml_parser *rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1205 fputs("raptor parser stats\n ", stream);
1206 raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream);
1207 }
1208 #endif
1209 #endif
1210 }
1211 #endif
1212
1213
1214 struct syntax_score
1215 {
1216 int score;
1217 raptor_parser_factory* factory;
1218 };
1219
1220
1221 static int
compare_syntax_score(const void * a,const void * b)1222 compare_syntax_score(const void *a, const void *b) {
1223 return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score;
1224 }
1225
1226 #define RAPTOR_MIN_GUESS_SCORE 2
1227
1228 /**
1229 * raptor_world_guess_parser_name:
1230 * @world: world object
1231 * @uri: URI identifying the syntax (or NULL)
1232 * @mime_type: mime type identifying the content (or NULL)
1233 * @buffer: buffer of content to guess (or NULL)
1234 * @len: length of buffer
1235 * @identifier: identifier of content (or NULL)
1236 *
1237 * Guess a parser name for content.
1238 *
1239 * Find a parser by scoring recognition of the syntax by a block of
1240 * characters, the content identifier or a mime type. The content
1241 * identifier is typically a filename or URI or some other identifier.
1242 *
1243 * If the guessing finds only low scores, NULL will be returned.
1244 *
1245 * Return value: a parser name or NULL if no guess could be made
1246 **/
1247 const char*
raptor_world_guess_parser_name(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1248 raptor_world_guess_parser_name(raptor_world* world,
1249 raptor_uri *uri, const char *mime_type,
1250 const unsigned char *buffer, size_t len,
1251 const unsigned char *identifier)
1252 {
1253 unsigned int i;
1254 raptor_parser_factory *factory;
1255 unsigned char *suffix = NULL;
1256 struct syntax_score* scores;
1257
1258 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
1259
1260 raptor_world_open(world);
1261
1262 scores = RAPTOR_CALLOC(struct syntax_score*,
1263 raptor_sequence_size(world->parsers),
1264 sizeof(struct syntax_score));
1265 if(!scores)
1266 return NULL;
1267
1268 if(identifier) {
1269 unsigned char *p = (unsigned char*)strrchr((const char*)identifier, '.');
1270 if(p) {
1271 unsigned char *from, *to;
1272
1273 p++;
1274 suffix = RAPTOR_MALLOC(unsigned char*, strlen((const char*)p) + 1);
1275 if(!suffix) {
1276 RAPTOR_FREE(syntax_scores, scores);
1277 return NULL;
1278 }
1279
1280 for(from = p, to = suffix; *from; ) {
1281 unsigned char c = *from++;
1282 /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */
1283 if(!isalpha(c) && !isdigit(c)) {
1284 RAPTOR_FREE(char*, suffix);
1285 suffix = NULL;
1286 to = NULL;
1287 break;
1288 }
1289 *to++ = isupper(c) ? (unsigned char)tolower(c): c;
1290 }
1291 if(to)
1292 *to = '\0';
1293 }
1294 }
1295
1296 for(i = 0;
1297 (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1298 i++) {
1299 int score = -1;
1300 const raptor_type_q* type_q = NULL;
1301
1302 if(mime_type && factory->desc.mime_types) {
1303 int j;
1304 type_q = NULL;
1305 for(j = 0;
1306 (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1307 j++) {
1308 if(!strcmp(mime_type, type_q->mime_type))
1309 break;
1310 }
1311 /* got an exact match mime type - score it via the Q */
1312 if(type_q)
1313 score = type_q->q;
1314 }
1315 /* mime type match has high Q - return factory as result */
1316 if(score >= 10)
1317 break;
1318
1319 if(uri && factory->desc.uri_strings) {
1320 int j;
1321 const char* uri_string = (const char*)raptor_uri_as_string(uri);
1322 const char* factory_uri_string = NULL;
1323
1324 for(j = 0;
1325 (factory_uri_string = factory->desc.uri_strings[j]);
1326 j++) {
1327 if(!strcmp(uri_string, factory_uri_string))
1328 break;
1329 }
1330 if(factory_uri_string)
1331 /* got an exact match syntax for URI - return factory as result */
1332 break;
1333 }
1334
1335 if(factory->recognise_syntax) {
1336 int c = -1;
1337
1338 /* Only use first N bytes to avoid HTML documents that contain
1339 * RDF/XML examples
1340 */
1341 #define FIRSTN 1024
1342 #if FIRSTN > RAPTOR_READ_BUFFER_SIZE
1343 #error "RAPTOR_READ_BUFFER_SIZE is not large enough"
1344 #endif
1345 if(buffer && len && len > FIRSTN) {
1346 c = buffer[FIRSTN];
1347 ((char*)buffer)[FIRSTN] = '\0';
1348 }
1349
1350 score += factory->recognise_syntax(factory, buffer, len,
1351 identifier, suffix,
1352 mime_type);
1353
1354 if(c >= 0)
1355 ((char*)buffer)[FIRSTN] = c;
1356 }
1357
1358 scores[i].score = score < 10 ? score : 10;
1359 scores[i].factory = factory;
1360 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
1361 RAPTOR_DEBUG3("Score %15s : %d\n", factory->desc.names[0], score);
1362 #endif
1363 }
1364
1365 if(!factory) {
1366 /* sort the scores and pick a factory if score is good enough */
1367 qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score);
1368
1369 if(scores[0].score >= RAPTOR_MIN_GUESS_SCORE)
1370 factory = scores[0].factory;
1371 }
1372
1373 if(suffix)
1374 RAPTOR_FREE(char*, suffix);
1375
1376 RAPTOR_FREE(syntax_scores, scores);
1377
1378 return factory ? factory->desc.names[0] : NULL;
1379 }
1380
1381
1382 /*
1383 * raptor_parser_copy_flags_state:
1384 * @to_parser: destination parser
1385 * @from_parser: source parser
1386 *
1387 * Copy status flags between parsers - INTERNAL.
1388 **/
1389 void
raptor_parser_copy_flags_state(raptor_parser * to_parser,raptor_parser * from_parser)1390 raptor_parser_copy_flags_state(raptor_parser *to_parser,
1391 raptor_parser *from_parser)
1392 {
1393 to_parser->failed = from_parser->failed;
1394 to_parser->emit_graph_marks = from_parser->emit_graph_marks;
1395 to_parser->emitted_default_graph = from_parser->emitted_default_graph;
1396 }
1397
1398
1399
1400 /*
1401 * raptor_parser_copy_user_state:
1402 * @to_parser: destination parser
1403 * @from_parser: source parser
1404 *
1405 * Copy user state between parsers - INTERNAL.
1406 *
1407 * Return value: non-0 on failure
1408 **/
1409 int
raptor_parser_copy_user_state(raptor_parser * to_parser,raptor_parser * from_parser)1410 raptor_parser_copy_user_state(raptor_parser *to_parser,
1411 raptor_parser *from_parser)
1412 {
1413 int rc = 0;
1414
1415 to_parser->user_data = from_parser->user_data;
1416 to_parser->statement_handler = from_parser->statement_handler;
1417 to_parser->namespace_handler = from_parser->namespace_handler;
1418 to_parser->namespace_handler_user_data = from_parser->namespace_handler_user_data;
1419 to_parser->uri_filter = from_parser->uri_filter;
1420 to_parser->uri_filter_user_data = from_parser->uri_filter_user_data;
1421
1422 /* copy bit flags */
1423 raptor_parser_copy_flags_state(to_parser, from_parser);
1424
1425 /* copy options */
1426 if(!rc)
1427 rc = raptor_object_options_copy_state(&to_parser->options,
1428 &from_parser->options);
1429
1430 return rc;
1431 }
1432
1433
1434 /*
1435 * raptor_parser_start_namespace:
1436 * @rdf_parser: parser
1437 * @nspace: namespace starting
1438 *
1439 * Internal - Invoke start namespace handler
1440 **/
1441 void
raptor_parser_start_namespace(raptor_parser * rdf_parser,raptor_namespace * nspace)1442 raptor_parser_start_namespace(raptor_parser* rdf_parser,
1443 raptor_namespace* nspace)
1444 {
1445 if(!rdf_parser->namespace_handler)
1446 return;
1447
1448 (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data,
1449 nspace);
1450 }
1451
1452
1453 /**
1454 * raptor_parser_get_accept_header:
1455 * @rdf_parser: parser
1456 *
1457 * Get an HTTP Accept value for the parser.
1458 *
1459 * The returned string must be freed by the caller such as with
1460 * raptor_free_memory().
1461 *
1462 * Return value: a new Accept: header string or NULL on failure
1463 **/
1464 const char*
raptor_parser_get_accept_header(raptor_parser * rdf_parser)1465 raptor_parser_get_accept_header(raptor_parser* rdf_parser)
1466 {
1467 raptor_parser_factory *factory = rdf_parser->factory;
1468 char *accept_header = NULL;
1469 size_t len;
1470 char *p;
1471 int i;
1472 const raptor_type_q* type_q;
1473
1474 if(factory->accept_header)
1475 return factory->accept_header(rdf_parser);
1476
1477 if(!factory->desc.mime_types)
1478 return NULL;
1479
1480 len = 0;
1481 for(i = 0;
1482 (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
1483 i++) {
1484 len += type_q->mime_type_len + 2; /* ", " */
1485 if(type_q->q < 10)
1486 len += 6; /* ";q=X.Y" */
1487 }
1488
1489 /* 9 = strlen("\*\/\*;q=0.1") */
1490 #define ACCEPT_HEADER_LEN 9
1491 accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
1492 if(!accept_header)
1493 return NULL;
1494
1495 p = accept_header;
1496 for(i = 0;
1497 (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
1498 i++) {
1499 memcpy(p, type_q->mime_type, type_q->mime_type_len);
1500 p += type_q->mime_type_len;
1501 if(type_q->q < 10) {
1502 *p++ = ';';
1503 *p++ = 'q';
1504 *p++ = '=';
1505 *p++ = '0';
1506 *p++ = '.';
1507 *p++ = '0' + (type_q->q);
1508 }
1509
1510 *p++ = ',';
1511 *p++ = ' ';
1512 }
1513
1514 memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
1515
1516 return accept_header;
1517 }
1518
1519
1520 const char*
raptor_parser_get_accept_header_all(raptor_world * world)1521 raptor_parser_get_accept_header_all(raptor_world* world)
1522 {
1523 raptor_parser_factory *factory;
1524 char *accept_header = NULL;
1525 size_t len;
1526 char *p;
1527 int i;
1528
1529 len = 0;
1530 for(i = 0;
1531 (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1532 i++) {
1533 const raptor_type_q* type_q;
1534 int j;
1535
1536 for(j = 0;
1537 (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1538 j++) {
1539 len += type_q->mime_type_len + 2; /* ", " */
1540 if(type_q->q < 10)
1541 len += 6; /* ";q=X.Y" */
1542 }
1543 }
1544
1545 /* 9 = strlen("\*\/\*;q=0.1") */
1546 #define ACCEPT_HEADER_LEN 9
1547 accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
1548 if(!accept_header)
1549 return NULL;
1550
1551 p = accept_header;
1552 for(i = 0;
1553 (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1554 i++) {
1555 const raptor_type_q* type_q;
1556 int j;
1557
1558 for(j = 0;
1559 (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1560 j++) {
1561 memcpy(p, type_q->mime_type, type_q->mime_type_len);
1562 p+= type_q->mime_type_len;
1563 if(type_q->q < 10) {
1564 *p++ = ';';
1565 *p++ = 'q';
1566 *p++ = '=';
1567 *p++ = '0';
1568 *p++ = '.';
1569 *p++ = '0' + (type_q->q);
1570 }
1571
1572 *p++ = ',';
1573 *p++ = ' ';
1574 }
1575
1576 }
1577
1578 memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
1579
1580 return accept_header;
1581 }
1582
1583
1584 void
raptor_parser_save_content(raptor_parser * rdf_parser,int save)1585 raptor_parser_save_content(raptor_parser* rdf_parser, int save)
1586 {
1587 if(rdf_parser->sb)
1588 raptor_free_stringbuffer(rdf_parser->sb);
1589
1590 rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL;
1591 }
1592
1593
1594 const unsigned char*
raptor_parser_get_content(raptor_parser * rdf_parser,size_t * length_p)1595 raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p)
1596 {
1597 unsigned char* buffer;
1598 size_t len;
1599
1600 if(!rdf_parser->sb)
1601 return NULL;
1602
1603 len = raptor_stringbuffer_length(rdf_parser->sb);
1604 buffer = RAPTOR_MALLOC(unsigned char*, len + 1);
1605 if(!buffer)
1606 return NULL;
1607
1608 raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len);
1609
1610 if(length_p)
1611 *length_p=len;
1612
1613 return buffer;
1614 }
1615
1616
1617 void
raptor_parser_start_graph(raptor_parser * parser,raptor_uri * uri,int is_declared)1618 raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri,
1619 int is_declared)
1620 {
1621 int flags = RAPTOR_GRAPH_MARK_START;
1622 if(is_declared)
1623 flags |= RAPTOR_GRAPH_MARK_DECLARED;
1624
1625 if(!parser->emit_graph_marks)
1626 return;
1627
1628 if(parser->graph_mark_handler)
1629 (*parser->graph_mark_handler)(parser->user_data, uri, flags);
1630 }
1631
1632
1633 void
raptor_parser_end_graph(raptor_parser * parser,raptor_uri * uri,int is_declared)1634 raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared)
1635 {
1636 int flags = 0;
1637 if(is_declared)
1638 flags |= RAPTOR_GRAPH_MARK_DECLARED;
1639
1640 if(!parser->emit_graph_marks)
1641 return;
1642
1643 if(parser->graph_mark_handler)
1644 (*parser->graph_mark_handler)(parser->user_data, uri, flags);
1645 }
1646
1647
1648 /**
1649 * raptor_parser_get_world:
1650 * @rdf_parser: parser
1651 *
1652 * Get the #raptor_world object associated with a parser.
1653 *
1654 * Return value: raptor_world* pointer
1655 **/
1656 raptor_world *
raptor_parser_get_world(raptor_parser * rdf_parser)1657 raptor_parser_get_world(raptor_parser* rdf_parser)
1658 {
1659 return rdf_parser->world;
1660 }
1661
1662
1663 /**
1664 * raptor_parser_get_graph:
1665 * @rdf_parser: parser
1666 *
1667 * Get the current graph for the parser
1668 *
1669 * The returned URI is owned by the caller and must be freed with
1670 * raptor_free_uri()
1671 *
1672 * Return value: raptor_uri* graph name or NULL for the default graph
1673 **/
1674 raptor_uri*
raptor_parser_get_graph(raptor_parser * rdf_parser)1675 raptor_parser_get_graph(raptor_parser* rdf_parser)
1676 {
1677 if(rdf_parser->factory->get_graph)
1678 return rdf_parser->factory->get_graph(rdf_parser);
1679 return NULL;
1680 }
1681
1682
1683 /**
1684 * raptor_parser_parse_iostream:
1685 * @rdf_parser: parser
1686 * @iostr: iostream to read from
1687 * @base_uri: the base URI to use (or NULL)
1688 *
1689 * Parse content from an iostream
1690 *
1691 * If the parser requires a base URI and @base_uri is NULL, an error
1692 * will be generated and the function will fail.
1693 *
1694 * Return value: non 0 on failure, <0 if a required base URI was missing
1695 **/
1696 int
raptor_parser_parse_iostream(raptor_parser * rdf_parser,raptor_iostream * iostr,raptor_uri * base_uri)1697 raptor_parser_parse_iostream(raptor_parser* rdf_parser, raptor_iostream *iostr,
1698 raptor_uri *base_uri)
1699 {
1700 int rc = 0;
1701
1702 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(rdf_parser, raptor_parser, 1);
1703 RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(iostr, raptor_iostr, 1);
1704
1705 rc = raptor_parser_parse_start(rdf_parser, base_uri);
1706 if(rc)
1707 return rc;
1708
1709 while(!raptor_iostream_read_eof(iostr)) {
1710 int ilen;
1711 size_t len;
1712 int is_end;
1713
1714 ilen = raptor_iostream_read_bytes(rdf_parser->buffer, 1,
1715 RAPTOR_READ_BUFFER_SIZE, iostr);
1716 if(ilen < 0)
1717 break;
1718 len = RAPTOR_GOOD_CAST(size_t, ilen);
1719 is_end = (len < RAPTOR_READ_BUFFER_SIZE);
1720
1721 rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
1722 if(rc || is_end)
1723 break;
1724 }
1725
1726 return rc;
1727 }
1728
1729
1730 /* end not STANDALONE */
1731 #endif
1732
1733
1734 #ifdef STANDALONE
1735 #include <stdio.h>
1736
1737 int main(int argc, char *argv[]);
1738
1739
1740 int
main(int argc,char * argv[])1741 main(int argc, char *argv[])
1742 {
1743 raptor_world *world;
1744 const char *program = raptor_basename(argv[0]);
1745 int i;
1746 const char *s;
1747
1748 world = raptor_new_world();
1749 if(!world || raptor_world_open(world))
1750 exit(1);
1751
1752 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1753 fprintf(stderr, "%s: Known options:\n", program);
1754 #endif
1755
1756 for(i = 0; i <= (int)raptor_option_get_count(); i++) {
1757 raptor_option_description *od;
1758 int fn;
1759
1760 od = raptor_world_get_option_description(world,
1761 RAPTOR_DOMAIN_PARSER,
1762 (raptor_option)i);
1763 if(!od)
1764 continue;
1765
1766 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1767 fprintf(stderr, " %2d %-20s %s <%s>\n", i, od->name, od->label,
1768 (od->uri ? (const char*)raptor_uri_as_string(od->uri) : ""));
1769 #endif
1770 fn = raptor_world_get_option_from_uri(world, od->uri);
1771 if(fn != i) {
1772 fprintf(stderr,
1773 "%s: raptor_option_from_uri() returned %d expected %d\n",
1774 program, fn, i);
1775 return 1;
1776 }
1777 raptor_free_option_description(od);
1778 }
1779
1780 s = raptor_parser_get_accept_header_all(world);
1781 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1782 fprintf(stderr, "Default HTTP accept header: '%s'\n", s);
1783 #endif
1784 if(!s) {
1785 fprintf(stderr, "%s: raptor_parser_get_accept_header_all() failed\n",
1786 program);
1787 return 1;
1788 }
1789 RAPTOR_FREE(char*, s);
1790
1791 raptor_free_world(world);
1792
1793 return 0;
1794 }
1795
1796 #endif
1797