1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_parse.c - Raptor Parser API
4  *
5  * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 #ifdef HAVE_SYS_STAT_H
41 #include <sys/stat.h>
42 #endif
43 #ifdef HAVE_FCNTL_H
44 #include <fcntl.h>
45 #endif
46 
47 /* Raptor includes */
48 #include "raptor2.h"
49 #include "raptor_internal.h"
50 
51 
52 #ifndef STANDALONE
53 
54 /* prototypes for helper functions */
55 static void raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict);
56 
57 /* helper methods */
58 
59 static void
raptor_free_parser_factory(raptor_parser_factory * factory)60 raptor_free_parser_factory(raptor_parser_factory* factory)
61 {
62   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory);
63 
64   if(factory->finish_factory)
65     factory->finish_factory(factory);
66 
67   RAPTOR_FREE(raptor_parser_factory, factory);
68 }
69 
70 
71 /* class methods */
72 
73 int
raptor_parsers_init(raptor_world * world)74 raptor_parsers_init(raptor_world *world)
75 {
76   int rc = 0;
77 
78   world->parsers = raptor_new_sequence((raptor_data_free_handler)raptor_free_parser_factory, NULL);
79   if(!world->parsers)
80     return 1;
81 
82 #ifdef RAPTOR_PARSER_RDFXML
83   rc+= raptor_init_parser_rdfxml(world) != 0;
84 #endif
85 
86 #ifdef RAPTOR_PARSER_NTRIPLES
87   rc+= raptor_init_parser_ntriples(world) != 0;
88 #endif
89 
90 #ifdef RAPTOR_PARSER_N3
91   rc+= raptor_init_parser_n3(world) != 0;
92 #endif
93 
94 #ifdef RAPTOR_PARSER_TURTLE
95   rc+= raptor_init_parser_turtle(world) != 0;
96 #endif
97 
98 #ifdef RAPTOR_PARSER_TRIG
99   rc+= raptor_init_parser_trig(world) != 0;
100 #endif
101 
102 #ifdef RAPTOR_PARSER_RSS
103   rc+= raptor_init_parser_rss(world) != 0;
104 #endif
105 
106 #if defined(RAPTOR_PARSER_GRDDL)
107   rc+= raptor_init_parser_grddl_common(world) != 0;
108 
109 #ifdef RAPTOR_PARSER_GRDDL
110   rc+= raptor_init_parser_grddl(world) != 0;
111 #endif
112 
113 #endif
114 
115 #ifdef RAPTOR_PARSER_GUESS
116   rc+= raptor_init_parser_guess(world) != 0;
117 #endif
118 
119 #ifdef RAPTOR_PARSER_RDFA
120   rc+= raptor_init_parser_rdfa(world) != 0;
121 #endif
122 
123 #ifdef RAPTOR_PARSER_JSON
124   rc+= raptor_init_parser_json(world) != 0;
125 #endif
126 
127 #ifdef RAPTOR_PARSER_NQUADS
128   rc+= raptor_init_parser_nquads(world) != 0;
129 #endif
130 
131   return rc;
132 }
133 
134 
135 /*
136  * raptor_finish_parsers - delete all the registered parsers
137  */
138 void
raptor_parsers_finish(raptor_world * world)139 raptor_parsers_finish(raptor_world *world)
140 {
141   if(world->parsers) {
142     raptor_free_sequence(world->parsers);
143     world->parsers = NULL;
144   }
145 #if defined(RAPTOR_PARSER_GRDDL)
146   raptor_terminate_parser_grddl_common(world);
147 #endif
148 }
149 
150 
151 /*
152  * raptor_world_register_parser_factory:
153  * @world: raptor world
154  * @factory: pointer to function to call to register the factory
155  *
156  * Internal - Register a parser via parser factory.
157  *
158  * All strings set in the @factory method are shared with the
159  * #raptor_parser_factory
160  *
161  * Return value: new factory object or NULL on failure
162  **/
163 RAPTOR_EXTERN_C
164 raptor_parser_factory*
raptor_world_register_parser_factory(raptor_world * world,int (* factory)(raptor_parser_factory *))165 raptor_world_register_parser_factory(raptor_world* world,
166                                      int (*factory) (raptor_parser_factory*))
167 {
168   raptor_parser_factory *parser = NULL;
169 
170   parser = RAPTOR_CALLOC(raptor_parser_factory*, 1, sizeof(*parser));
171   if(!parser)
172     return NULL;
173 
174   parser->world = world;
175 
176   parser->desc.mime_types = NULL;
177 
178   if(raptor_sequence_push(world->parsers, parser))
179     return NULL; /* on error, parser is already freed by the sequence */
180 
181   /* Call the parser registration function on the new object */
182   if(factory(parser))
183     return NULL; /* parser is owned and freed by the parsers sequence */
184 
185   if(raptor_syntax_description_validate(&parser->desc)) {
186     raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
187                      "Parser description failed to validate\n");
188     goto tidy;
189   }
190 
191 
192 
193 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
194   RAPTOR_DEBUG2("Registered parser %s\n", parser->desc.names[0]);
195 #endif
196 
197   return parser;
198 
199   /* Clean up on failure */
200   tidy:
201   raptor_free_parser_factory(parser);
202   return NULL;
203 }
204 
205 
206 /*
207  * raptor_world_get_parser_factory:
208  * @world: world object
209  * @name: the factory name or NULL for the default factory
210  *
211  * INTERNAL - Get a parser factory by name.
212  *
213  * Return value: the factory object or NULL if there is no such factory
214  **/
215 raptor_parser_factory*
raptor_world_get_parser_factory(raptor_world * world,const char * name)216 raptor_world_get_parser_factory(raptor_world *world, const char *name)
217 {
218   raptor_parser_factory *factory = NULL;
219 
220   /* return 1st parser if no particular one wanted - why? */
221   if(!name) {
222     factory = (raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0);
223     if(!factory) {
224       RAPTOR_DEBUG1("No (default) parsers registered\n");
225       return NULL;
226     }
227   } else {
228     int i;
229 
230     for(i = 0;
231         (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
232         i++) {
233       int namei;
234       const char* fname;
235 
236       for(namei = 0; (fname = factory->desc.names[namei]); namei++) {
237         if(!strcmp(fname, name))
238           break;
239       }
240       if(fname)
241         break;
242     }
243   }
244 
245   return factory;
246 }
247 
248 
249 /**
250  * raptor_world_get_parsers_count:
251  * @world: world object
252  *
253  * Get number of parsers
254  *
255  * Return value: number of parsers
256  **/
257 int
raptor_world_get_parsers_count(raptor_world * world)258 raptor_world_get_parsers_count(raptor_world* world)
259 {
260   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
261 
262   raptor_world_open(world);
263 
264   return raptor_sequence_size(world->parsers);
265 }
266 
267 
268 /**
269  * raptor_world_get_parser_description:
270  * @world: world object
271  * @counter: index into the list of parsers
272  *
273  * Get parser descriptive syntax information
274  *
275  * Return value: description or NULL if counter is out of range
276  **/
277 const raptor_syntax_description*
raptor_world_get_parser_description(raptor_world * world,unsigned int counter)278 raptor_world_get_parser_description(raptor_world* world,
279                                     unsigned int counter)
280 {
281   raptor_parser_factory *factory;
282 
283   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
284 
285   raptor_world_open(world);
286 
287   factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers,
288                                                            counter);
289 
290   if(!factory)
291     return NULL;
292 
293   return &factory->desc;
294 }
295 
296 
297 /**
298  * raptor_world_is_parser_name:
299  * @world: world object
300  * @name: the syntax name
301  *
302  * Check the name of a parser is known.
303  *
304  * Return value: non 0 if name is a known syntax name
305  */
306 int
raptor_world_is_parser_name(raptor_world * world,const char * name)307 raptor_world_is_parser_name(raptor_world* world, const char *name)
308 {
309   if(!name)
310     return 0;
311 
312   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0);
313 
314   raptor_world_open(world);
315 
316   return (raptor_world_get_parser_factory(world, name) != NULL);
317 }
318 
319 
320 /**
321  * raptor_new_parser:
322  * @world: world object
323  * @name: the parser name or NULL for default parser
324  *
325  * Constructor - create a new raptor_parser object.
326  *
327  * Return value: a new #raptor_parser object or NULL on failure
328  */
329 raptor_parser*
raptor_new_parser(raptor_world * world,const char * name)330 raptor_new_parser(raptor_world* world, const char *name)
331 {
332   raptor_parser_factory* factory;
333   raptor_parser* rdf_parser;
334 
335   RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
336 
337   raptor_world_open(world);
338 
339   factory = raptor_world_get_parser_factory(world, name);
340   if(!factory)
341     return NULL;
342 
343   rdf_parser = RAPTOR_CALLOC(raptor_parser*, 1, sizeof(*rdf_parser));
344   if(!rdf_parser)
345     return NULL;
346 
347   rdf_parser->world = world;
348   raptor_statement_init(&rdf_parser->statement, world);
349 
350   rdf_parser->context = RAPTOR_CALLOC(void*, 1, factory->context_length);
351   if(!rdf_parser->context) {
352     raptor_free_parser(rdf_parser);
353     return NULL;
354   }
355 
356 #ifdef RAPTOR_XML_LIBXML
357   rdf_parser->magic = RAPTOR_LIBXML_MAGIC;
358 #endif
359   rdf_parser->factory = factory;
360 
361   /* Bit flags */
362   rdf_parser->failed = 0;
363   rdf_parser->emit_graph_marks = 1;
364   rdf_parser->emitted_default_graph = 0;
365 
366   raptor_object_options_init(&rdf_parser->options, RAPTOR_OPTION_AREA_PARSER);
367 
368   /* set parsing strictness from default value */
369   raptor_parser_set_strict(rdf_parser,
370                            RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_STRICT));
371 
372   if(factory->init(rdf_parser, name)) {
373     raptor_free_parser(rdf_parser);
374     return NULL;
375   }
376 
377   return rdf_parser;
378 }
379 
380 
381 /**
382  * raptor_new_parser_for_content:
383  * @world: world object
384  * @uri: URI identifying the syntax (or NULL)
385  * @mime_type: mime type identifying the content (or NULL)
386  * @buffer: buffer of content to guess (or NULL)
387  * @len: length of buffer
388  * @identifier: identifier of content (or NULL)
389  *
390  * Constructor - create a new raptor_parser.
391  *
392  * Uses raptor_world_guess_parser_name() to find a parser by scoring
393  * recognition of the syntax by a block of characters, the content
394  * identifier or a mime type.  The content identifier is typically a
395  * filename or URI or some other identifier.
396  *
397  * Return value: a new #raptor_parser object or NULL on failure
398  **/
399 raptor_parser*
raptor_new_parser_for_content(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)400 raptor_new_parser_for_content(raptor_world* world,
401                               raptor_uri *uri, const char *mime_type,
402                               const unsigned char *buffer, size_t len,
403                               const unsigned char *identifier)
404 {
405   const char* name;
406 
407   RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
408 
409   raptor_world_open(world);
410 
411   name = raptor_world_guess_parser_name(world, uri, mime_type,
412                                         buffer, len, identifier);
413   return name ? raptor_new_parser(world, name) : NULL;
414 }
415 
416 
417 /**
418  * raptor_parser_parse_start:
419  * @rdf_parser: RDF parser
420  * @uri: base URI or may be NULL if no base URI is required
421  *
422  * Start a parse of content with base URI.
423  *
424  * Parsers that need a base URI can be identified using a syntax
425  * description returned by raptor_world_get_parser_description()
426  * statically or raptor_parser_get_description() on a constructed
427  * parser.
428  *
429  * Return value: non-0 on failure, <0 if a required base URI was missing
430  **/
431 int
raptor_parser_parse_start(raptor_parser * rdf_parser,raptor_uri * uri)432 raptor_parser_parse_start(raptor_parser *rdf_parser, raptor_uri *uri)
433 {
434   if((rdf_parser->factory->desc.flags & RAPTOR_SYNTAX_NEED_BASE_URI) && !uri) {
435     raptor_parser_error(rdf_parser, "Missing base URI for %s parser.",
436                         rdf_parser->factory->desc.names[0]);
437     return -1;
438   }
439 
440   if(uri)
441     uri = raptor_uri_copy(uri);
442 
443   if(rdf_parser->base_uri)
444     raptor_free_uri(rdf_parser->base_uri);
445   rdf_parser->base_uri = uri;
446 
447   rdf_parser->locator.uri    = uri;
448   rdf_parser->locator.line   = -1;
449   rdf_parser->locator.column = -1;
450   rdf_parser->locator.byte   = -1;
451 
452   if(rdf_parser->factory->start)
453     return rdf_parser->factory->start(rdf_parser);
454   else
455     return 0;
456 }
457 
458 
459 
460 
461 /**
462  * raptor_parser_parse_chunk:
463  * @rdf_parser: RDF parser
464  * @buffer: content to parse
465  * @len: length of buffer
466  * @is_end: non-0 if this is the end of the content (such as EOF)
467  *
468  * Parse a block of content into triples.
469  *
470  * This method can only be called after raptor_parser_parse_start() has
471  * initialised the parser.
472  *
473  * Return value: non-0 on failure.
474  **/
475 int
raptor_parser_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)476 raptor_parser_parse_chunk(raptor_parser* rdf_parser,
477                           const unsigned char *buffer, size_t len, int is_end)
478 {
479   if(rdf_parser->sb)
480     raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1);
481 
482   return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end);
483 }
484 
485 
486 /**
487  * raptor_free_parser:
488  * @parser: #raptor_parser object
489  *
490  * Destructor - destroy a raptor_parser object.
491  *
492  **/
493 void
raptor_free_parser(raptor_parser * rdf_parser)494 raptor_free_parser(raptor_parser* rdf_parser)
495 {
496   if(!rdf_parser)
497     return;
498 
499   if(rdf_parser->factory)
500     rdf_parser->factory->terminate(rdf_parser);
501 
502   if(rdf_parser->www)
503     raptor_free_www(rdf_parser->www);
504 
505   if(rdf_parser->context)
506     RAPTOR_FREE(raptor_parser_context, rdf_parser->context);
507 
508   if(rdf_parser->base_uri)
509     raptor_free_uri(rdf_parser->base_uri);
510 
511   if(rdf_parser->sb)
512     raptor_free_stringbuffer(rdf_parser->sb);
513 
514   raptor_object_options_clear(&rdf_parser->options);
515 
516   RAPTOR_FREE(raptor_parser, rdf_parser);
517 }
518 
519 
520 /**
521  * raptor_parser_parse_file_stream:
522  * @rdf_parser: parser
523  * @stream: FILE* of RDF content
524  * @filename: filename of content or NULL if it has no name
525  * @base_uri: the base URI to use
526  *
527  * Parse RDF content from a FILE*.
528  *
529  * After draining the FILE* stream (EOF), fclose is not called on it.
530  *
531  * Return value: non 0 on failure
532  **/
533 int
raptor_parser_parse_file_stream(raptor_parser * rdf_parser,FILE * stream,const char * filename,raptor_uri * base_uri)534 raptor_parser_parse_file_stream(raptor_parser* rdf_parser,
535                                 FILE *stream, const char* filename,
536                                 raptor_uri *base_uri)
537 {
538   int rc = 0;
539   raptor_locator *locator = &rdf_parser->locator;
540 
541   if(!stream || !base_uri)
542     return 1;
543 
544   locator->line= locator->column = -1;
545   locator->file= filename;
546 
547   if(raptor_parser_parse_start(rdf_parser, base_uri))
548     return 1;
549 
550   while(!feof(stream)) {
551     size_t len = fread(rdf_parser->buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream);
552     int is_end = (len < RAPTOR_READ_BUFFER_SIZE);
553     rdf_parser->buffer[len] = '\0';
554     rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
555     if(rc || is_end)
556       break;
557   }
558 
559   return (rc != 0);
560 }
561 
562 
563 /**
564  * raptor_parser_parse_file:
565  * @rdf_parser: parser
566  * @uri: URI of RDF content or NULL to read from standard input
567  * @base_uri: the base URI to use (or NULL if the same)
568  *
569  * Parse RDF content at a file URI.
570  *
571  * If @uri is NULL (source is stdin), then the @base_uri is required.
572  *
573  * Return value: non 0 on failure
574  **/
575 int
raptor_parser_parse_file(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)576 raptor_parser_parse_file(raptor_parser* rdf_parser, raptor_uri *uri,
577                          raptor_uri *base_uri)
578 {
579   int rc = 0;
580   int free_base_uri = 0;
581   const char *filename = NULL;
582   FILE *fh = NULL;
583 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
584   struct stat buf;
585 #endif
586 
587   if(uri) {
588     filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri));
589     if(!filename)
590       return 1;
591 
592 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
593     if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
594       raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'",
595                           filename);
596       goto cleanup;
597     }
598 #endif
599 
600     fh = fopen(filename, "r");
601     if(!fh) {
602       raptor_parser_error(rdf_parser, "file '%s' open failed - %s",
603                           filename, strerror(errno));
604       goto cleanup;
605     }
606     if(!base_uri) {
607       base_uri = raptor_uri_copy(uri);
608       free_base_uri = 1;
609     }
610   } else {
611     if(!base_uri)
612       return 1;
613     fh = stdin;
614   }
615 
616   rc = raptor_parser_parse_file_stream(rdf_parser, fh, filename, base_uri);
617 
618   cleanup:
619   if(uri) {
620     if(fh)
621       fclose(fh);
622     RAPTOR_FREE(char*, filename);
623   }
624   if(free_base_uri)
625     raptor_free_uri(base_uri);
626 
627   return rc;
628 }
629 
630 
631 void
raptor_parser_parse_uri_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)632 raptor_parser_parse_uri_write_bytes(raptor_www* www,
633                                     void *userdata, const void *ptr,
634                                     size_t size, size_t nmemb)
635 {
636   raptor_parse_bytes_context* rpbc = (raptor_parse_bytes_context*)userdata;
637   size_t len = size * nmemb;
638 
639   if(!rpbc->started) {
640     raptor_uri* base_uri = rpbc->base_uri;
641 
642     if(!base_uri) {
643       rpbc->final_uri = raptor_www_get_final_uri(www);
644       /* base URI after URI resolution is finally chosen */
645       base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri;
646     }
647 
648     if(raptor_parser_parse_start(rpbc->rdf_parser, base_uri))
649       raptor_www_abort(www, "Parsing failed");
650     rpbc->started = 1;
651   }
652 
653   if(raptor_parser_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0))
654     raptor_www_abort(www, "Parsing failed");
655 }
656 
657 
658 static void
raptor_parser_parse_uri_content_type_handler(raptor_www * www,void * userdata,const char * content_type)659 raptor_parser_parse_uri_content_type_handler(raptor_www* www, void* userdata,
660                                              const char* content_type)
661 {
662   raptor_parser* rdf_parser = (raptor_parser*)userdata;
663   if(rdf_parser->factory->content_type_handler)
664     rdf_parser->factory->content_type_handler(rdf_parser, content_type);
665 }
666 
667 
668 int
raptor_parser_set_uri_filter_no_net(void * user_data,raptor_uri * uri)669 raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri)
670 {
671   unsigned char* uri_string = raptor_uri_as_string(uri);
672 
673   if(raptor_uri_uri_string_is_file_uri(uri_string))
674     return 0;
675 
676   raptor_parser_error((raptor_parser*)user_data,
677                       "Network fetch of URI '%s' denied", uri_string);
678   return 1;
679 }
680 
681 
682 /**
683  * raptor_parser_parse_uri:
684  * @rdf_parser: parser
685  * @uri: URI of RDF content
686  * @base_uri: the base URI to use (or NULL if the same)
687  *
688  * Parse the RDF content at URI.
689  *
690  * Sends an HTTP Accept: header whent the URI is of the HTTP protocol,
691  * see raptor_parser_parse_uri_with_connection() for details including
692  * how the @base_uri is used.
693  *
694  * Return value: non 0 on failure
695  **/
696 int
raptor_parser_parse_uri(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)697 raptor_parser_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri,
698                         raptor_uri *base_uri)
699 {
700   return raptor_parser_parse_uri_with_connection(rdf_parser, uri, base_uri,
701                                                  NULL);
702 }
703 
704 
705 /**
706  * raptor_parser_parse_uri_with_connection:
707  * @rdf_parser: parser
708  * @uri: URI of RDF content
709  * @base_uri: the base URI to use (or NULL if the same)
710  * @connection: connection object pointer or NULL to create a new one
711  *
712  * Parse RDF content at URI using existing WWW connection.
713  *
714  * If @base_uri is not given and during resolution of the URI, a
715  * protocol redirection occurs, the final resolved URI will be
716  * used as the base URI.  If redirection does not occur, the
717  * base URI will be @uri.
718  *
719  * If @base_uri is given, it overrides the process above.
720  *
721  * When @connection is NULL and a MIME Type exists for the parser
722  * type, this type is sent in an HTTP Accept: header in the form
723  * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is
724  * prefered rather than the sole answer.  The latter part may not be
725  * necessary but should ensure an HTTP 200 response.
726  *
727  * Return value: non 0 on failure
728  **/
729 int
raptor_parser_parse_uri_with_connection(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri,void * connection)730 raptor_parser_parse_uri_with_connection(raptor_parser* rdf_parser,
731                                         raptor_uri *uri,
732                                         raptor_uri *base_uri, void *connection)
733 {
734   int ret = 0;
735   raptor_parse_bytes_context rpbc;
736   char* ua = NULL;
737   char* cert_filename = NULL;
738   char* cert_type = NULL;
739   char* cert_passphrase = NULL;
740   int ssl_verify_peer;
741   int ssl_verify_host;
742 
743   if(connection) {
744     if(rdf_parser->www)
745       raptor_free_www(rdf_parser->www);
746     rdf_parser->www = raptor_new_www_with_connection(rdf_parser->world,
747                                                      connection);
748     if(!rdf_parser->www)
749       return 1;
750   } else {
751     const char *accept_h;
752 
753     if(rdf_parser->www)
754       raptor_free_www(rdf_parser->www);
755     rdf_parser->www = raptor_new_www(rdf_parser->world);
756     if(!rdf_parser->www)
757       return 1;
758 
759     accept_h = raptor_parser_get_accept_header(rdf_parser);
760     if(accept_h) {
761       raptor_www_set_http_accept(rdf_parser->www, accept_h);
762       RAPTOR_FREE(char*, accept_h);
763     }
764   }
765 
766   rpbc.rdf_parser = rdf_parser;
767   rpbc.base_uri = base_uri;
768   rpbc.final_uri = NULL;
769   rpbc.started = 0;
770 
771   if(rdf_parser->uri_filter)
772     raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter,
773                               rdf_parser->uri_filter_user_data);
774   else if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
775     raptor_www_set_uri_filter(rdf_parser->www,
776                               raptor_parser_set_uri_filter_no_net, rdf_parser);
777 
778   raptor_www_set_write_bytes_handler(rdf_parser->www,
779                                      raptor_parser_parse_uri_write_bytes,
780                                      &rpbc);
781 
782   raptor_www_set_content_type_handler(rdf_parser->www,
783                                       raptor_parser_parse_uri_content_type_handler,
784                                       rdf_parser);
785 
786   raptor_www_set_http_cache_control(rdf_parser->www,
787                                     RAPTOR_OPTIONS_GET_STRING(rdf_parser,
788                                                               RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL));
789 
790   ua = RAPTOR_OPTIONS_GET_STRING(rdf_parser, RAPTOR_OPTION_WWW_HTTP_USER_AGENT);
791   if(ua)
792     raptor_www_set_user_agent(rdf_parser->www, ua);
793 
794   cert_filename = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
795                                             RAPTOR_OPTION_WWW_CERT_FILENAME);
796   cert_type = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
797                                         RAPTOR_OPTION_WWW_CERT_TYPE);
798   cert_passphrase = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
799                                               RAPTOR_OPTION_WWW_CERT_PASSPHRASE);
800   if(cert_filename || cert_type || cert_passphrase)
801     raptor_www_set_ssl_cert_options(rdf_parser->www, cert_filename,
802                                     cert_type, cert_passphrase);
803 
804   ssl_verify_peer = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
805                                                RAPTOR_OPTION_WWW_SSL_VERIFY_PEER);
806   ssl_verify_host = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
807                                                RAPTOR_OPTION_WWW_SSL_VERIFY_HOST);
808   raptor_www_set_ssl_verify_options(rdf_parser->www, ssl_verify_peer,
809                                     ssl_verify_host);
810 
811   ret = raptor_www_fetch(rdf_parser->www, uri);
812 
813   if(!rpbc.started && !ret)
814     ret = raptor_parser_parse_start(rdf_parser, base_uri);
815 
816   if(rpbc.final_uri)
817     raptor_free_uri(rpbc.final_uri);
818 
819   if(ret) {
820     raptor_free_www(rdf_parser->www);
821     rdf_parser->www = NULL;
822     return 1;
823   }
824 
825   if(raptor_parser_parse_chunk(rdf_parser, NULL, 0, 1))
826     rdf_parser->failed = 1;
827 
828   raptor_free_www(rdf_parser->www);
829   rdf_parser->www = NULL;
830 
831   return rdf_parser->failed;
832 }
833 
834 
835 /*
836  * raptor_parser_fatal_error - Fatal Error from a parser - Internal
837  */
838 void
raptor_parser_fatal_error(raptor_parser * parser,const char * message,...)839 raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...)
840 {
841   va_list arguments;
842 
843   va_start(arguments, message);
844   if(parser) {
845     parser->failed = 1;
846     raptor_log_error_varargs(parser->world,
847                              RAPTOR_LOG_LEVEL_FATAL,
848                              &parser->locator,
849                              message, arguments);
850   } else
851     raptor_log_error_varargs(NULL,
852                              RAPTOR_LOG_LEVEL_FATAL, NULL,
853                              message, arguments);
854   va_end(arguments);
855 }
856 
857 
858 /*
859  * raptor_parser_error - Error from a parser - Internal
860  */
861 void
raptor_parser_error(raptor_parser * parser,const char * message,...)862 raptor_parser_error(raptor_parser* parser, const char *message, ...)
863 {
864   va_list arguments;
865 
866   va_start(arguments, message);
867 
868   raptor_parser_log_error_varargs(parser, RAPTOR_LOG_LEVEL_ERROR,
869                                   message, arguments);
870 
871   va_end(arguments);
872 }
873 
874 
875 /**
876  * raptor_parser_log_error_varargs:
877  * @parser: parser (or NULL)
878  * @level: log level
879  * @message: error format message
880  * @arguments: varargs for message
881  *
882  * Error from a parser - Internal.
883  */
884 void
raptor_parser_log_error_varargs(raptor_parser * parser,raptor_log_level level,const char * message,va_list arguments)885 raptor_parser_log_error_varargs(raptor_parser* parser,
886                                 raptor_log_level level,
887                                 const char *message, va_list arguments)
888 {
889   if(parser)
890     raptor_log_error_varargs(parser->world,
891                              level,
892                              &parser->locator,
893                              message, arguments);
894   else
895     raptor_log_error_varargs(NULL,
896                              level,
897                              NULL,
898                              message, arguments);
899 }
900 
901 
902 /*
903  * raptor_parser_warning - Warning from a parser - Internal
904  */
905 void
raptor_parser_warning(raptor_parser * parser,const char * message,...)906 raptor_parser_warning(raptor_parser* parser, const char *message, ...)
907 {
908   va_list arguments;
909 
910   va_start(arguments, message);
911 
912   if(parser)
913     raptor_log_error_varargs(parser->world,
914                              RAPTOR_LOG_LEVEL_WARN,
915                              &parser->locator,
916                              message, arguments);
917   else
918     raptor_log_error_varargs(NULL,
919                              RAPTOR_LOG_LEVEL_WARN,
920                              NULL,
921                              message, arguments);
922 
923   va_end(arguments);
924 }
925 
926 
927 
928 /* PUBLIC FUNCTIONS */
929 
930 /**
931  * raptor_parser_set_statement_handler:
932  * @parser: #raptor_parser parser object
933  * @user_data: user data pointer for callback
934  * @handler: new statement callback function
935  *
936  * Set the statement handler function for the parser.
937  *
938  * Use this to set the function to receive statements as the parsing
939  * proceeds. The statement argument to @handler is shared and must be
940  * copied by the caller with raptor_statement_copy().
941  **/
942 void
raptor_parser_set_statement_handler(raptor_parser * parser,void * user_data,raptor_statement_handler handler)943 raptor_parser_set_statement_handler(raptor_parser* parser,
944                                     void *user_data,
945                                     raptor_statement_handler handler)
946 {
947   parser->user_data = user_data;
948   parser->statement_handler = handler;
949 }
950 
951 
952 /**
953  * raptor_parser_set_graph_mark_handler:
954  * @parser: #raptor_parser parser object
955  * @user_data: user data pointer for callback
956  * @handler: new graph callback function
957  *
958  * Set the graph mark handler function for the parser.
959  *
960  * See #raptor_graph_mark_handler and #raptor_graph_mark_flags for
961  * the marks that may be returned by the handler.
962  *
963  **/
964 void
raptor_parser_set_graph_mark_handler(raptor_parser * parser,void * user_data,raptor_graph_mark_handler handler)965 raptor_parser_set_graph_mark_handler(raptor_parser* parser,
966                                      void *user_data,
967                                      raptor_graph_mark_handler handler)
968 {
969   parser->user_data = user_data;
970   parser->graph_mark_handler = handler;
971 }
972 
973 
974 /**
975  * raptor_parser_set_namespace_handler:
976  * @parser: #raptor_parser parser object
977  * @user_data: user data pointer for callback
978  * @handler: new namespace callback function
979  *
980  * Set the namespace handler function for the parser.
981  *
982  * When a prefix/namespace is seen in a parser, call the given
983  * @handler with the prefix string and the #raptor_uri namespace URI.
984  * Either can be NULL for the default prefix or default namespace.
985  *
986  * The handler function does not deal with duplicates so any
987  * namespace may be declared multiple times.
988  *
989  **/
990 void
raptor_parser_set_namespace_handler(raptor_parser * parser,void * user_data,raptor_namespace_handler handler)991 raptor_parser_set_namespace_handler(raptor_parser* parser,
992                                     void *user_data,
993                                     raptor_namespace_handler handler)
994 {
995   parser->namespace_handler = handler;
996   parser->namespace_handler_user_data = user_data;
997 }
998 
999 
1000 /**
1001  * raptor_parser_set_uri_filter:
1002  * @parser: parser object
1003  * @filter: URI filter function
1004  * @user_data: User data to pass to filter function
1005  *
1006  * Set URI filter function for WWW retrieval.
1007  **/
1008 void
raptor_parser_set_uri_filter(raptor_parser * parser,raptor_uri_filter_func filter,void * user_data)1009 raptor_parser_set_uri_filter(raptor_parser* parser,
1010                              raptor_uri_filter_func filter,
1011                              void *user_data)
1012 {
1013   parser->uri_filter = filter;
1014   parser->uri_filter_user_data = user_data;
1015 }
1016 
1017 
1018 /**
1019  * raptor_parser_set_option:
1020  * @parser: #raptor_parser parser object
1021  * @option: option to set from enumerated #raptor_option values
1022  * @string: string option value (or NULL)
1023  * @integer: integer option value
1024  *
1025  * Set parser option.
1026  *
1027  * If @string is not NULL and the option type is numeric, the string
1028  * value is converted to an integer and used in preference to @integer.
1029  *
1030  * If @string is NULL and the option type is not numeric, an error is
1031  * returned.
1032  *
1033  * The @string values used are copied.
1034  *
1035  * The allowed options are available via
1036  * raptor_world_get_option_description().
1037  *
1038  * Return value: non 0 on failure or if the option is unknown
1039  **/
1040 int
raptor_parser_set_option(raptor_parser * parser,raptor_option option,const char * string,int integer)1041 raptor_parser_set_option(raptor_parser *parser, raptor_option option,
1042                          const char* string, int integer)
1043 {
1044   int rc;
1045 
1046   rc = raptor_object_options_set_option(&parser->options, option,
1047                                         string, integer);
1048   if(option == RAPTOR_OPTION_STRICT && !rc) {
1049     int is_strict = RAPTOR_OPTIONS_GET_NUMERIC(parser, RAPTOR_OPTION_STRICT);
1050     raptor_parser_set_strict(parser, is_strict);
1051   }
1052 
1053   return rc;
1054 }
1055 
1056 
1057 /**
1058  * raptor_parser_get_option:
1059  * @parser: #raptor_parser parser object
1060  * @option: option to get value
1061  * @string_p: pointer to where to store string value
1062  * @integer_p: pointer to where to store integer value
1063  *
1064  * Get parser option.
1065  *
1066  * Any string value returned in *@string_p is shared and must
1067  * be copied by the caller.
1068  *
1069  * The allowed options are available via
1070  * raptor_world_get_option_description().
1071  *
1072  * Return value: option value or < 0 for an illegal option
1073  **/
1074 int
raptor_parser_get_option(raptor_parser * parser,raptor_option option,char ** string_p,int * integer_p)1075 raptor_parser_get_option(raptor_parser *parser, raptor_option option,
1076                          char** string_p, int* integer_p)
1077 {
1078   return raptor_object_options_get_option(&parser->options, option,
1079                                           string_p, integer_p);
1080 }
1081 
1082 
1083 /**
1084  * raptor_parser_set_strict:
1085  * @rdf_parser: #raptor_parser object
1086  * @is_strict: Non 0 for strict parsing
1087  *
1088  * INTERNAL - Set parser to strict / lax mode.
1089  *
1090  **/
1091 static void
raptor_parser_set_strict(raptor_parser * rdf_parser,int is_strict)1092 raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict)
1093 {
1094   is_strict = (is_strict) ? 1 : 0;
1095 
1096   /* Initialise default parser mode */
1097   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING, 0);
1098 
1099   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, !is_strict);
1100   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, !is_strict);
1101   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID, !is_strict);
1102   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, 0);
1103   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE, 1);
1104   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL, is_strict);
1105   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES, !is_strict);
1106   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID, 1);
1107   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_TAG_SOUP, !is_strict);
1108   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_MICROFORMATS, !is_strict);
1109   RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_LINK, !is_strict);
1110 }
1111 
1112 
1113 /**
1114  * raptor_parser_get_name:
1115  * @rdf_parser: #raptor_parser parser object
1116  *
1117  * Get the name of a parser.
1118  *
1119  * Use raptor_parser_get_description() to get the alternate names and
1120  * aliases as well as other descriptive values.
1121  *
1122  * Return value: the short name for the parser.
1123  **/
1124 const char*
raptor_parser_get_name(raptor_parser * rdf_parser)1125 raptor_parser_get_name(raptor_parser *rdf_parser)
1126 {
1127   if(rdf_parser->factory->get_name)
1128     return rdf_parser->factory->get_name(rdf_parser);
1129   else
1130     return rdf_parser->factory->desc.names[0];
1131 }
1132 
1133 
1134 /**
1135  * raptor_parser_get_description:
1136  * @rdf_parser: #raptor_parser parser object
1137  *
1138  * Get description of the syntaxes of the parser.
1139  *
1140  * The returned description is static and lives as long as the raptor
1141  * library (raptor world).
1142  *
1143  * Return value: description of syntax
1144  **/
1145 const raptor_syntax_description*
raptor_parser_get_description(raptor_parser * rdf_parser)1146 raptor_parser_get_description(raptor_parser *rdf_parser)
1147 {
1148   if(rdf_parser->factory->get_description)
1149     return rdf_parser->factory->get_description(rdf_parser);
1150   else
1151     return &rdf_parser->factory->desc;
1152 }
1153 
1154 
1155 
1156 /**
1157  * raptor_parser_parse_abort:
1158  * @rdf_parser: #raptor_parser parser object
1159  *
1160  * Abort an ongoing parsing.
1161  *
1162  * Causes any ongoing generation of statements by a parser to be
1163  * terminated and the parser to return controlto the application
1164  * as soon as draining any existing buffers.
1165  *
1166  * Most useful inside raptor_parser_parse_file() or
1167  * raptor_parser_parse_uri() when the Raptor library is directing the
1168  * parsing and when one of the callback handlers such as as set by
1169  * raptor_parser_set_statement_handler() requires to return to the main
1170  * application code.
1171  **/
1172 void
raptor_parser_parse_abort(raptor_parser * rdf_parser)1173 raptor_parser_parse_abort(raptor_parser *rdf_parser)
1174 {
1175   rdf_parser->failed = 1;
1176 }
1177 
1178 
1179 /**
1180  * raptor_parser_get_locator:
1181  * @rdf_parser: raptor parser
1182  *
1183  * Get the current raptor locator object.
1184  *
1185  * Return value: raptor locator
1186  **/
1187 raptor_locator*
raptor_parser_get_locator(raptor_parser * rdf_parser)1188 raptor_parser_get_locator(raptor_parser *rdf_parser)
1189 {
1190   if(rdf_parser->factory->get_locator)
1191     return rdf_parser->factory->get_locator(rdf_parser);
1192   else
1193     return &rdf_parser->locator;
1194 }
1195 
1196 
1197 #ifdef RAPTOR_DEBUG
1198 void
raptor_stats_print(raptor_parser * rdf_parser,FILE * stream)1199 raptor_stats_print(raptor_parser *rdf_parser, FILE *stream)
1200 {
1201 #ifdef RAPTOR_PARSER_RDFXML
1202 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1203   if(!strcmp(rdf_parser->factory->desc.names[0], "rdfxml")) {
1204     raptor_rdfxml_parser *rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1205     fputs("raptor parser stats\n  ", stream);
1206     raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream);
1207   }
1208 #endif
1209 #endif
1210 }
1211 #endif
1212 
1213 
1214 struct syntax_score
1215 {
1216   int score;
1217   raptor_parser_factory* factory;
1218 };
1219 
1220 
1221 static int
compare_syntax_score(const void * a,const void * b)1222 compare_syntax_score(const void *a, const void *b) {
1223   return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score;
1224 }
1225 
1226 #define RAPTOR_MIN_GUESS_SCORE 2
1227 
1228 /**
1229  * raptor_world_guess_parser_name:
1230  * @world: world object
1231  * @uri: URI identifying the syntax (or NULL)
1232  * @mime_type: mime type identifying the content (or NULL)
1233  * @buffer: buffer of content to guess (or NULL)
1234  * @len: length of buffer
1235  * @identifier: identifier of content (or NULL)
1236  *
1237  * Guess a parser name for content.
1238  *
1239  * Find a parser by scoring recognition of the syntax by a block of
1240  * characters, the content identifier or a mime type.  The content
1241  * identifier is typically a filename or URI or some other identifier.
1242  *
1243  * If the guessing finds only low scores, NULL will be returned.
1244  *
1245  * Return value: a parser name or NULL if no guess could be made
1246  **/
1247 const char*
raptor_world_guess_parser_name(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1248 raptor_world_guess_parser_name(raptor_world* world,
1249                                raptor_uri *uri, const char *mime_type,
1250                                const unsigned char *buffer, size_t len,
1251                                const unsigned char *identifier)
1252 {
1253   unsigned int i;
1254   raptor_parser_factory *factory;
1255   unsigned char *suffix = NULL;
1256   struct syntax_score* scores;
1257 
1258   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
1259 
1260   raptor_world_open(world);
1261 
1262   scores = RAPTOR_CALLOC(struct syntax_score*,
1263                          raptor_sequence_size(world->parsers),
1264                          sizeof(struct syntax_score));
1265   if(!scores)
1266     return NULL;
1267 
1268   if(identifier) {
1269     unsigned char *p = (unsigned char*)strrchr((const char*)identifier, '.');
1270     if(p) {
1271       unsigned char *from, *to;
1272 
1273       p++;
1274       suffix = RAPTOR_MALLOC(unsigned char*, strlen((const char*)p) + 1);
1275       if(!suffix) {
1276         RAPTOR_FREE(syntax_scores, scores);
1277         return NULL;
1278       }
1279 
1280       for(from = p, to = suffix; *from; ) {
1281         unsigned char c = *from++;
1282         /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */
1283         if(!isalpha(c) && !isdigit(c)) {
1284           RAPTOR_FREE(char*, suffix);
1285           suffix = NULL;
1286           to = NULL;
1287           break;
1288         }
1289         *to++ = isupper(c) ? (unsigned char)tolower(c): c;
1290       }
1291       if(to)
1292         *to = '\0';
1293     }
1294   }
1295 
1296   for(i = 0;
1297       (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1298       i++) {
1299     int score = -1;
1300     const raptor_type_q* type_q = NULL;
1301 
1302     if(mime_type && factory->desc.mime_types) {
1303       int j;
1304       type_q = NULL;
1305       for(j = 0;
1306           (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1307           j++) {
1308         if(!strcmp(mime_type, type_q->mime_type))
1309           break;
1310       }
1311       /* got an exact match mime type - score it via the Q */
1312       if(type_q)
1313         score = type_q->q;
1314     }
1315     /* mime type match has high Q - return factory as result */
1316     if(score >= 10)
1317       break;
1318 
1319     if(uri && factory->desc.uri_strings) {
1320       int j;
1321       const char* uri_string = (const char*)raptor_uri_as_string(uri);
1322       const char* factory_uri_string = NULL;
1323 
1324       for(j = 0;
1325           (factory_uri_string = factory->desc.uri_strings[j]);
1326           j++) {
1327         if(!strcmp(uri_string, factory_uri_string))
1328           break;
1329       }
1330       if(factory_uri_string)
1331         /* got an exact match syntax for URI - return factory as result */
1332         break;
1333     }
1334 
1335     if(factory->recognise_syntax) {
1336       int c = -1;
1337 
1338       /* Only use first N bytes to avoid HTML documents that contain
1339        * RDF/XML examples
1340        */
1341 #define FIRSTN 1024
1342 #if FIRSTN > RAPTOR_READ_BUFFER_SIZE
1343 #error "RAPTOR_READ_BUFFER_SIZE is not large enough"
1344 #endif
1345       if(buffer && len && len > FIRSTN) {
1346         c = buffer[FIRSTN];
1347         ((char*)buffer)[FIRSTN] = '\0';
1348       }
1349 
1350       score += factory->recognise_syntax(factory, buffer, len,
1351                                          identifier, suffix,
1352                                          mime_type);
1353 
1354       if(c >= 0)
1355         ((char*)buffer)[FIRSTN] = c;
1356     }
1357 
1358     scores[i].score = score < 10 ? score : 10;
1359     scores[i].factory = factory;
1360 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
1361     RAPTOR_DEBUG3("Score %15s : %d\n", factory->desc.names[0], score);
1362 #endif
1363   }
1364 
1365   if(!factory) {
1366     /* sort the scores and pick a factory if score is good enough */
1367     qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score);
1368 
1369     if(scores[0].score >= RAPTOR_MIN_GUESS_SCORE)
1370       factory = scores[0].factory;
1371   }
1372 
1373   if(suffix)
1374     RAPTOR_FREE(char*, suffix);
1375 
1376   RAPTOR_FREE(syntax_scores, scores);
1377 
1378   return factory ? factory->desc.names[0] : NULL;
1379 }
1380 
1381 
1382 /*
1383  * raptor_parser_copy_flags_state:
1384  * @to_parser: destination parser
1385  * @from_parser: source parser
1386  *
1387  * Copy status flags between parsers - INTERNAL.
1388  **/
1389 void
raptor_parser_copy_flags_state(raptor_parser * to_parser,raptor_parser * from_parser)1390 raptor_parser_copy_flags_state(raptor_parser *to_parser,
1391                                raptor_parser *from_parser)
1392 {
1393   to_parser->failed = from_parser->failed;
1394   to_parser->emit_graph_marks = from_parser->emit_graph_marks;
1395   to_parser->emitted_default_graph = from_parser->emitted_default_graph;
1396 }
1397 
1398 
1399 
1400 /*
1401  * raptor_parser_copy_user_state:
1402  * @to_parser: destination parser
1403  * @from_parser: source parser
1404  *
1405  * Copy user state between parsers - INTERNAL.
1406  *
1407  * Return value: non-0 on failure
1408  **/
1409 int
raptor_parser_copy_user_state(raptor_parser * to_parser,raptor_parser * from_parser)1410 raptor_parser_copy_user_state(raptor_parser *to_parser,
1411                               raptor_parser *from_parser)
1412 {
1413   int rc = 0;
1414 
1415   to_parser->user_data = from_parser->user_data;
1416   to_parser->statement_handler = from_parser->statement_handler;
1417   to_parser->namespace_handler = from_parser->namespace_handler;
1418   to_parser->namespace_handler_user_data = from_parser->namespace_handler_user_data;
1419   to_parser->uri_filter = from_parser->uri_filter;
1420   to_parser->uri_filter_user_data = from_parser->uri_filter_user_data;
1421 
1422   /* copy bit flags */
1423   raptor_parser_copy_flags_state(to_parser, from_parser);
1424 
1425   /* copy options */
1426   if(!rc)
1427     rc = raptor_object_options_copy_state(&to_parser->options,
1428                                           &from_parser->options);
1429 
1430   return rc;
1431 }
1432 
1433 
1434 /*
1435  * raptor_parser_start_namespace:
1436  * @rdf_parser: parser
1437  * @nspace: namespace starting
1438  *
1439  * Internal - Invoke start namespace handler
1440  **/
1441 void
raptor_parser_start_namespace(raptor_parser * rdf_parser,raptor_namespace * nspace)1442 raptor_parser_start_namespace(raptor_parser* rdf_parser,
1443                               raptor_namespace* nspace)
1444 {
1445   if(!rdf_parser->namespace_handler)
1446     return;
1447 
1448   (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data,
1449                                    nspace);
1450 }
1451 
1452 
1453 /**
1454  * raptor_parser_get_accept_header:
1455  * @rdf_parser: parser
1456  *
1457  * Get an HTTP Accept value for the parser.
1458  *
1459  * The returned string must be freed by the caller such as with
1460  * raptor_free_memory().
1461  *
1462  * Return value: a new Accept: header string or NULL on failure
1463  **/
1464 const char*
raptor_parser_get_accept_header(raptor_parser * rdf_parser)1465 raptor_parser_get_accept_header(raptor_parser* rdf_parser)
1466 {
1467   raptor_parser_factory *factory = rdf_parser->factory;
1468   char *accept_header = NULL;
1469   size_t len;
1470   char *p;
1471   int i;
1472   const raptor_type_q* type_q;
1473 
1474   if(factory->accept_header)
1475     return factory->accept_header(rdf_parser);
1476 
1477   if(!factory->desc.mime_types)
1478     return NULL;
1479 
1480   len = 0;
1481   for(i = 0;
1482       (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
1483       i++) {
1484     len += type_q->mime_type_len + 2; /* ", " */
1485     if(type_q->q < 10)
1486       len += 6; /* ";q=X.Y" */
1487   }
1488 
1489   /* 9 = strlen("\*\/\*;q=0.1") */
1490 #define ACCEPT_HEADER_LEN 9
1491   accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
1492   if(!accept_header)
1493     return NULL;
1494 
1495   p = accept_header;
1496   for(i = 0;
1497       (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
1498       i++) {
1499     memcpy(p, type_q->mime_type, type_q->mime_type_len);
1500     p += type_q->mime_type_len;
1501     if(type_q->q < 10) {
1502       *p++ = ';';
1503       *p++ = 'q';
1504       *p++ = '=';
1505       *p++ = '0';
1506       *p++ = '.';
1507       *p++ = '0' + (type_q->q);
1508     }
1509 
1510     *p++ = ',';
1511     *p++ = ' ';
1512   }
1513 
1514   memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
1515 
1516   return accept_header;
1517 }
1518 
1519 
1520 const char*
raptor_parser_get_accept_header_all(raptor_world * world)1521 raptor_parser_get_accept_header_all(raptor_world* world)
1522 {
1523   raptor_parser_factory *factory;
1524   char *accept_header = NULL;
1525   size_t len;
1526   char *p;
1527   int i;
1528 
1529   len = 0;
1530   for(i = 0;
1531       (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1532       i++) {
1533     const raptor_type_q* type_q;
1534     int j;
1535 
1536     for(j = 0;
1537         (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1538         j++) {
1539       len += type_q->mime_type_len + 2; /* ", " */
1540       if(type_q->q < 10)
1541         len += 6; /* ";q=X.Y" */
1542     }
1543   }
1544 
1545   /* 9 = strlen("\*\/\*;q=0.1") */
1546 #define ACCEPT_HEADER_LEN 9
1547   accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
1548   if(!accept_header)
1549     return NULL;
1550 
1551   p = accept_header;
1552   for(i = 0;
1553       (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
1554       i++) {
1555     const raptor_type_q* type_q;
1556     int j;
1557 
1558     for(j = 0;
1559         (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
1560         j++) {
1561       memcpy(p, type_q->mime_type, type_q->mime_type_len);
1562       p+= type_q->mime_type_len;
1563       if(type_q->q < 10) {
1564         *p++ = ';';
1565         *p++ = 'q';
1566         *p++ = '=';
1567         *p++ = '0';
1568         *p++ = '.';
1569         *p++ = '0' + (type_q->q);
1570       }
1571 
1572       *p++ = ',';
1573       *p++ = ' ';
1574     }
1575 
1576   }
1577 
1578   memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
1579 
1580   return accept_header;
1581 }
1582 
1583 
1584 void
raptor_parser_save_content(raptor_parser * rdf_parser,int save)1585 raptor_parser_save_content(raptor_parser* rdf_parser, int save)
1586 {
1587   if(rdf_parser->sb)
1588     raptor_free_stringbuffer(rdf_parser->sb);
1589 
1590   rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL;
1591 }
1592 
1593 
1594 const unsigned char*
raptor_parser_get_content(raptor_parser * rdf_parser,size_t * length_p)1595 raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p)
1596 {
1597   unsigned char* buffer;
1598   size_t len;
1599 
1600   if(!rdf_parser->sb)
1601     return NULL;
1602 
1603   len = raptor_stringbuffer_length(rdf_parser->sb);
1604   buffer = RAPTOR_MALLOC(unsigned char*, len + 1);
1605   if(!buffer)
1606     return NULL;
1607 
1608   raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len);
1609 
1610   if(length_p)
1611     *length_p=len;
1612 
1613   return buffer;
1614 }
1615 
1616 
1617 void
raptor_parser_start_graph(raptor_parser * parser,raptor_uri * uri,int is_declared)1618 raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri,
1619                           int is_declared)
1620 {
1621   int flags = RAPTOR_GRAPH_MARK_START;
1622   if(is_declared)
1623     flags |= RAPTOR_GRAPH_MARK_DECLARED;
1624 
1625   if(!parser->emit_graph_marks)
1626     return;
1627 
1628   if(parser->graph_mark_handler)
1629     (*parser->graph_mark_handler)(parser->user_data, uri, flags);
1630 }
1631 
1632 
1633 void
raptor_parser_end_graph(raptor_parser * parser,raptor_uri * uri,int is_declared)1634 raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared)
1635 {
1636   int flags = 0;
1637   if(is_declared)
1638     flags |= RAPTOR_GRAPH_MARK_DECLARED;
1639 
1640   if(!parser->emit_graph_marks)
1641     return;
1642 
1643   if(parser->graph_mark_handler)
1644     (*parser->graph_mark_handler)(parser->user_data, uri, flags);
1645 }
1646 
1647 
1648 /**
1649  * raptor_parser_get_world:
1650  * @rdf_parser: parser
1651  *
1652  * Get the #raptor_world object associated with a parser.
1653  *
1654  * Return value: raptor_world* pointer
1655  **/
1656 raptor_world *
raptor_parser_get_world(raptor_parser * rdf_parser)1657 raptor_parser_get_world(raptor_parser* rdf_parser)
1658 {
1659   return rdf_parser->world;
1660 }
1661 
1662 
1663 /**
1664  * raptor_parser_get_graph:
1665  * @rdf_parser: parser
1666  *
1667  * Get the current graph for the parser
1668  *
1669  * The returned URI is owned by the caller and must be freed with
1670  * raptor_free_uri()
1671  *
1672  * Return value: raptor_uri* graph name or NULL for the default graph
1673  **/
1674 raptor_uri*
raptor_parser_get_graph(raptor_parser * rdf_parser)1675 raptor_parser_get_graph(raptor_parser* rdf_parser)
1676 {
1677   if(rdf_parser->factory->get_graph)
1678     return rdf_parser->factory->get_graph(rdf_parser);
1679   return NULL;
1680 }
1681 
1682 
1683 /**
1684  * raptor_parser_parse_iostream:
1685  * @rdf_parser: parser
1686  * @iostr: iostream to read from
1687  * @base_uri: the base URI to use (or NULL)
1688  *
1689  * Parse content from an iostream
1690  *
1691  * If the parser requires a base URI and @base_uri is NULL, an error
1692  * will be generated and the function will fail.
1693  *
1694  * Return value: non 0 on failure, <0 if a required base URI was missing
1695  **/
1696 int
raptor_parser_parse_iostream(raptor_parser * rdf_parser,raptor_iostream * iostr,raptor_uri * base_uri)1697 raptor_parser_parse_iostream(raptor_parser* rdf_parser, raptor_iostream *iostr,
1698                              raptor_uri *base_uri)
1699 {
1700   int rc = 0;
1701 
1702   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(rdf_parser, raptor_parser, 1);
1703   RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(iostr, raptor_iostr, 1);
1704 
1705   rc = raptor_parser_parse_start(rdf_parser, base_uri);
1706   if(rc)
1707     return rc;
1708 
1709   while(!raptor_iostream_read_eof(iostr)) {
1710     int ilen;
1711     size_t len;
1712     int is_end;
1713 
1714     ilen = raptor_iostream_read_bytes(rdf_parser->buffer, 1,
1715                                       RAPTOR_READ_BUFFER_SIZE, iostr);
1716     if(ilen < 0)
1717       break;
1718     len = RAPTOR_GOOD_CAST(size_t, ilen);
1719     is_end = (len < RAPTOR_READ_BUFFER_SIZE);
1720 
1721     rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
1722     if(rc || is_end)
1723       break;
1724   }
1725 
1726   return rc;
1727 }
1728 
1729 
1730 /* end not STANDALONE */
1731 #endif
1732 
1733 
1734 #ifdef STANDALONE
1735 #include <stdio.h>
1736 
1737 int main(int argc, char *argv[]);
1738 
1739 
1740 int
main(int argc,char * argv[])1741 main(int argc, char *argv[])
1742 {
1743   raptor_world *world;
1744   const char *program = raptor_basename(argv[0]);
1745   int i;
1746   const char *s;
1747 
1748   world = raptor_new_world();
1749   if(!world || raptor_world_open(world))
1750     exit(1);
1751 
1752 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1753   fprintf(stderr, "%s: Known options:\n", program);
1754 #endif
1755 
1756   for(i = 0; i <= (int)raptor_option_get_count(); i++) {
1757     raptor_option_description *od;
1758     int fn;
1759 
1760     od = raptor_world_get_option_description(world,
1761                                              RAPTOR_DOMAIN_PARSER,
1762                                              (raptor_option)i);
1763     if(!od)
1764       continue;
1765 
1766 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1767     fprintf(stderr, " %2d %-20s %s <%s>\n", i, od->name, od->label,
1768             (od->uri ? (const char*)raptor_uri_as_string(od->uri) : ""));
1769 #endif
1770     fn = raptor_world_get_option_from_uri(world, od->uri);
1771     if(fn != i) {
1772       fprintf(stderr,
1773               "%s: raptor_option_from_uri() returned %d expected %d\n",
1774               program, fn, i);
1775       return 1;
1776     }
1777     raptor_free_option_description(od);
1778   }
1779 
1780   s = raptor_parser_get_accept_header_all(world);
1781 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
1782   fprintf(stderr, "Default HTTP accept header: '%s'\n", s);
1783 #endif
1784   if(!s) {
1785     fprintf(stderr, "%s: raptor_parser_get_accept_header_all() failed\n",
1786             program);
1787     return 1;
1788   }
1789   RAPTOR_FREE(char*, s);
1790 
1791   raptor_free_world(world);
1792 
1793   return 0;
1794 }
1795 
1796 #endif
1797