1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_parse.c - Raptor Parser API
4  *
5  * Copyright (C) 2000-2009, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33 
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_SYS_STAT_H
46 #include <sys/stat.h>
47 #endif
48 #ifdef HAVE_FCNTL_H
49 #include <fcntl.h>
50 #endif
51 
52 /* Raptor includes */
53 #include "raptor.h"
54 #include "raptor_internal.h"
55 
56 
57 #ifndef STANDALONE
58 
59 /* prototypes for helper functions */
60 static void raptor_free_type_q(raptor_type_q* type_q);
61 
62 
63 /* helper methods */
64 
65 static void
raptor_free_parser_factory(raptor_parser_factory * factory)66 raptor_free_parser_factory(raptor_parser_factory* factory)
67 {
68   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory);
69 
70   if(factory->finish_factory)
71     factory->finish_factory(factory);
72 
73   if(factory->name)
74     RAPTOR_FREE(raptor_parser_factory, (void*)factory->name);
75   if(factory->label)
76     RAPTOR_FREE(raptor_parser_factory, (void*)factory->label);
77   if(factory->alias)
78     RAPTOR_FREE(raptor_parser_factory, (void*)factory->alias);
79   if(factory->mime_types)
80     raptor_free_sequence(factory->mime_types);
81   if(factory->uri_string)
82     RAPTOR_FREE(raptor_parser_factory, (void*)factory->uri_string);
83 
84   RAPTOR_FREE(raptor_parser_factory, factory);
85 }
86 
87 
88 /* class methods */
89 
90 int
raptor_parsers_init(raptor_world * world)91 raptor_parsers_init(raptor_world *world)
92 {
93   int rc=0;
94 
95   world->parsers=raptor_new_sequence((raptor_sequence_free_handler *)raptor_free_parser_factory, NULL);
96   if(!world->parsers)
97     return 1;
98 
99 #ifdef RAPTOR_PARSER_RDFXML
100   rc+= raptor_init_parser_rdfxml(world) != 0;
101 #endif
102 
103 #ifdef RAPTOR_PARSER_NTRIPLES
104   rc+= raptor_init_parser_ntriples(world) != 0;
105 #endif
106 
107 #ifdef RAPTOR_PARSER_N3
108   rc+= raptor_init_parser_n3(world) != 0;
109 #endif
110 
111 #ifdef RAPTOR_PARSER_TURTLE
112   rc+= raptor_init_parser_turtle(world) != 0;
113 #endif
114 
115 #ifdef RAPTOR_PARSER_TRIG
116   rc+= raptor_init_parser_trig(world) != 0;
117 #endif
118 
119 #ifdef RAPTOR_PARSER_RSS
120   rc+= raptor_init_parser_rss(world) != 0;
121 #endif
122 
123 #if defined(RAPTOR_PARSER_GRDDL)
124   rc+= raptor_init_parser_grddl_common(world) != 0;
125 
126 #ifdef RAPTOR_PARSER_GRDDL
127   rc+= raptor_init_parser_grddl(world) != 0;
128 #endif
129 
130 #endif
131 
132 #ifdef RAPTOR_PARSER_GUESS
133   rc+= raptor_init_parser_guess(world) != 0;
134 #endif
135 
136 #ifdef RAPTOR_PARSER_RDFA
137   rc+= raptor_init_parser_rdfa(world) != 0;
138 #endif
139 
140   return rc;
141 }
142 
143 
144 /*
145  * raptor_finish_parsers - delete all the registered parsers
146  */
147 void
raptor_parsers_finish(raptor_world * world)148 raptor_parsers_finish(raptor_world *world)
149 {
150   if(world->parsers) {
151     raptor_free_sequence(world->parsers);
152     world->parsers=NULL;
153   }
154 #if defined(RAPTOR_PARSER_GRDDL)
155   raptor_terminate_parser_grddl_common(world);
156 #endif
157 }
158 
159 
160 /*
161  * raptor_parser_register_factory:
162  * @name: the short syntax name
163  * @label: readable label for syntax
164  * @mime_type: MIME type of the syntax handled by the parser (or NULL)
165  * @uri_string: URI string of the syntax (or NULL)
166  * @factory: pointer to function to call to register the factory
167  *
168  * Register a syntax handled by a parser factory.
169  *
170  * INTERNAL
171  *
172  **/
173 RAPTOR_EXTERN_C
174 raptor_parser_factory*
raptor_parser_register_factory(raptor_world * world,const char * name,const char * label,int (* factory)(raptor_parser_factory *))175 raptor_parser_register_factory(raptor_world* world,
176                                const char *name, const char *label,
177                                int (*factory) (raptor_parser_factory*))
178 {
179   raptor_parser_factory *parser=NULL;
180   raptor_parser_factory *h;
181   char *name_copy, *label_copy;
182   int i;
183 
184 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
185   RAPTOR_DEBUG3("Received registration for syntax %s '%s'\n", name, label);
186 #endif
187 
188   for(i=0;
189       (h=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
190       i++) {
191     if(!strcmp(h->name, name)) {
192       RAPTOR_DEBUG2("parser %s already registered\n", h->name);
193       return NULL;
194     }
195   }
196 
197   parser=(raptor_parser_factory*)RAPTOR_CALLOC(raptor_parser_factory, 1,
198                                                sizeof(raptor_parser_factory));
199   if(!parser)
200     return NULL;
201 
202   parser->world=world;
203 
204   name_copy=(char*)RAPTOR_CALLOC(cstring, strlen(name)+1, 1);
205   if(!name_copy)
206     goto tidy;
207   strcpy(name_copy, name);
208   parser->name=name_copy;
209 
210   label_copy=(char*)RAPTOR_CALLOC(cstring, strlen(label)+1, 1);
211   if(!label_copy)
212     goto tidy;
213   strcpy(label_copy, label);
214   parser->label=label_copy;
215 
216   parser->mime_types=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_type_q, NULL);
217   if(!parser->mime_types)
218     goto tidy;
219 
220   if(raptor_sequence_push(world->parsers, parser))
221     return NULL; /* on error, parser is already freed by the sequence */
222 
223   /* Call the parser registration function on the new object */
224   if (factory(parser))
225     return NULL; /* parser is owned and freed by the parsers sequence */
226 
227 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
228   RAPTOR_DEBUG3("%s has context size %d\n", name, parser->context_length);
229 #endif
230 
231   return parser;
232 
233   /* Clean up on failure */
234   tidy:
235   raptor_free_parser_factory(parser);
236   return NULL;
237 }
238 
239 
240 int
raptor_parser_factory_add_alias(raptor_parser_factory * factory,const char * alias)241 raptor_parser_factory_add_alias(raptor_parser_factory* factory,
242                                 const char *alias)
243 {
244   raptor_parser_factory *p;
245   char *alias_copy;
246   int i;
247 
248   for(i=0;
249       (p=(raptor_parser_factory*)raptor_sequence_get_at(factory->world->parsers, i));
250       i++) {
251     if(!strcmp(p->name, alias)) {
252       RAPTOR_DEBUG2("parser %s already registered\n", p->name);
253       return 1;
254     }
255   }
256 
257   alias_copy=(char*)RAPTOR_CALLOC(cstring, strlen(alias)+1, 1);
258   if(!alias_copy)
259     return 1;
260   strcpy(alias_copy, alias);
261   factory->alias=alias_copy;
262 
263   return 0;
264 }
265 
266 
267 static void
raptor_free_type_q(raptor_type_q * type_q)268 raptor_free_type_q(raptor_type_q* type_q)
269 {
270   RAPTOR_FREE(cstring, (void*)type_q->mime_type);
271   RAPTOR_FREE(raptor_type_q, (void*)type_q);
272 }
273 
274 
275 /**
276  * raptor_parser_factory_add_mime_type:
277  * @factory: Raptor parser factory
278  * @mime_type: MIME Type string
279  * @q: Accept 'Q' value 0 to 10 inclusive representing 0.0 to 1.0
280  *
281  * Register a MIME type as handled by a factory.
282  *
283  * The FIRST added MIME type is the default or main one reported.
284  *
285  * Return value: non-0 on failure
286  *
287  **/
288 int
raptor_parser_factory_add_mime_type(raptor_parser_factory * factory,const char * mime_type,int q)289 raptor_parser_factory_add_mime_type(raptor_parser_factory* factory,
290                                     const char* mime_type, int q)
291 {
292   raptor_type_q* type_q;
293   char* mime_type_copy;
294   size_t len;
295 
296   type_q=(raptor_type_q*)RAPTOR_CALLOC(raptor_type_q, sizeof(raptor_type_q), 1);
297   if(!type_q)
298     return 1;
299   len=strlen(mime_type);
300   mime_type_copy=(char*)RAPTOR_CALLOC(cstring, len+1, 1);
301   if(!mime_type_copy) {
302     raptor_free_type_q(type_q);
303     return 1;
304   }
305   strcpy(mime_type_copy, mime_type);
306 
307   type_q->mime_type=mime_type_copy;
308   type_q->mime_type_len=len;
309 
310   if(q<0)
311     q=0;
312   if(q>10)
313     q=10;
314   type_q->q=q;
315 
316   return raptor_sequence_push(factory->mime_types, type_q);
317 }
318 
319 
320 /**
321  * raptor_parser_factory_add_uri:
322  * @factory: Raptor parser factory
323  * @uri_string: URI string
324  *
325  * Register an identifying URI as handled by a factory.
326  *
327  * Return value: non-0 on failure
328  **/
329 int
raptor_parser_factory_add_uri(raptor_parser_factory * factory,const unsigned char * uri_string)330 raptor_parser_factory_add_uri(raptor_parser_factory* factory,
331                               const unsigned char *uri_string)
332 {
333   unsigned char *uri_string_copy;
334 
335   if(!uri_string)
336     return 1;
337 
338   uri_string_copy=(unsigned char*)RAPTOR_CALLOC(cstring, strlen((const char*)uri_string)+1, 1);
339   if(!uri_string_copy)
340     return 1;
341 
342   strcpy((char*)uri_string_copy, (const char*)uri_string);
343   factory->uri_string=uri_string_copy;
344 
345   return 0;
346 }
347 
348 
349 /**
350  * raptor_get_parser_factory:
351  * @world: raptor_world object
352  * @name: the factory name or NULL for the default factory
353  *
354  * Get a parser factory by name.
355  *
356  * Return value: the factory object or NULL if there is no such factory
357  **/
358 raptor_parser_factory*
raptor_get_parser_factory(raptor_world * world,const char * name)359 raptor_get_parser_factory(raptor_world *world, const char *name)
360 {
361   raptor_parser_factory *factory;
362 
363   /* return 1st parser if no particular one wanted - why? */
364   if(!name) {
365     factory=(raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0);
366     if(!factory) {
367       RAPTOR_DEBUG1("No (default) parsers registered\n");
368       return NULL;
369     }
370   } else {
371     int i;
372 
373     for(i=0;
374         (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
375         i++) {
376       if(!strcmp(factory->name, name) ||
377          (factory->alias && !strcmp(factory->alias, name)))
378         break;
379     }
380     /* else FACTORY name not found */
381     if(!factory) {
382       RAPTOR_DEBUG2("No parser with name %s found\n", name);
383       return NULL;
384     }
385   }
386 
387   return factory;
388 }
389 
390 
391 #ifndef RAPTOR_DISABLE_V1
392 /**
393  * raptor_syntaxes_enumerate:
394  * @counter: index into the list of syntaxes
395  * @name: pointer to store the name of the syntax (or NULL)
396  * @label: pointer to store syntax readable label (or NULL)
397  * @mime_type: pointer to store syntax MIME Type (or NULL)
398  * @uri_string: pointer to store syntax URI string (or NULL)
399  *
400  * Get information on syntaxes.
401  *
402  * raptor_init() MUST have been called before calling this function.
403  * Use raptor_syntaxes_enumerate_v2() if using raptor_world APIs.
404  *
405  * Return value: non 0 on failure of if counter is out of range
406  **/
407 int
raptor_syntaxes_enumerate(const unsigned int counter,const char ** name,const char ** label,const char ** mime_type,const unsigned char ** uri_string)408 raptor_syntaxes_enumerate(const unsigned int counter,
409                           const char **name, const char **label,
410                           const char **mime_type,
411                           const unsigned char **uri_string)
412 {
413   return raptor_syntaxes_enumerate_v2(raptor_world_instance(),
414     counter, name, label, mime_type, uri_string);
415 }
416 #endif
417 
418 
419 /**
420  * raptor_syntaxes_enumerate_v2:
421  * @world: raptor_world object
422  * @counter: index into the list of syntaxes
423  * @name: pointer to store the name of the syntax (or NULL)
424  * @label: pointer to store syntax readable label (or NULL)
425  * @mime_type: pointer to store syntax MIME Type (or NULL)
426  * @uri_string: pointer to store syntax URI string (or NULL)
427  *
428  * Get information on syntaxes.
429  *
430  * Return value: non 0 on failure of if counter is out of range
431  **/
432 int
raptor_syntaxes_enumerate_v2(raptor_world * world,const unsigned int counter,const char ** name,const char ** label,const char ** mime_type,const unsigned char ** uri_string)433 raptor_syntaxes_enumerate_v2(raptor_world* world,
434                              const unsigned int counter,
435                              const char **name, const char **label,
436                              const char **mime_type,
437                              const unsigned char **uri_string)
438 {
439   raptor_parser_factory *factory;
440 
441   factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers,
442                                                          counter);
443 
444   if(!factory)
445     return 1;
446 
447   if(name)
448     *name=factory->name;
449   if(label)
450     *label=factory->label;
451   if(mime_type) {
452     const char *mime_type_t=NULL;
453     if(factory->mime_types) {
454       raptor_type_q* tq;
455       tq=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, 0);
456       if(tq)
457         mime_type_t=tq->mime_type;
458     }
459     *mime_type=mime_type_t;
460   }
461   if(uri_string)
462     *uri_string=factory->uri_string;
463   return 0;
464 }
465 
466 
467 #ifndef RAPTOR_DISABLE_V1
468 /**
469  * raptor_parsers_enumerate:
470  * @counter: index to list of parsers
471  * @name: pointer to store syntax name (or NULL)
472  * @label: pointer to store syntax label (or NULL)
473  *
474  * Get list of syntax parsers.
475  *
476  * Return value: non 0 on failure of if counter is out of range
477  **/
478 int
raptor_parsers_enumerate(const unsigned int counter,const char ** name,const char ** label)479 raptor_parsers_enumerate(const unsigned int counter,
480                          const char **name, const char **label)
481 {
482   return raptor_syntaxes_enumerate(counter, name, label, NULL, NULL);
483 }
484 
485 
486 /**
487  * raptor_syntax_name_check:
488  * @name: the syntax name
489  *
490  * Check name of a parser.
491  *
492  * raptor_init() MUST have been called before calling this function.
493  * Use raptor_syntax_name_check_v2() if using raptor_world APIs.
494  *
495  * Return value: non 0 if name is a known syntax name
496  */
497 int
raptor_syntax_name_check(const char * name)498 raptor_syntax_name_check(const char *name) {
499   return raptor_syntax_name_check_v2(raptor_world_instance(), name);
500 }
501 #endif
502 
503 
504 /**
505  * raptor_syntax_name_check_v2:
506  * @world: raptor_world object
507  * @name: the syntax name
508  *
509  * Check name of a parser.
510  *
511  * Return value: non 0 if name is a known syntax name
512  */
513 int
raptor_syntax_name_check_v2(raptor_world * world,const char * name)514 raptor_syntax_name_check_v2(raptor_world* world, const char *name) {
515   return (raptor_get_parser_factory(world, name) != NULL);
516 }
517 
518 
519 #ifndef RAPTOR_DISABLE_V1
520 /**
521  * raptor_new_parser:
522  * @name: the parser name
523  *
524  * Constructor - create a new raptor_parser object.
525  *
526  * raptor_init() MUST have been called before calling this function.
527  * Use raptor_new_parser_v2() if using raptor_world APIs.
528  *
529  * Return value: a new #raptor_parser object or NULL on failure
530  */
531 raptor_parser*
raptor_new_parser(const char * name)532 raptor_new_parser(const char *name) {
533   return raptor_new_parser_v2(raptor_world_instance(), name);
534 }
535 #endif
536 
537 
538 /**
539  * raptor_new_parser_v2:
540  * @world: raptor_world object
541  * @name: the parser name
542  *
543  * Constructor - create a new raptor_parser object.
544  *
545  * Return value: a new #raptor_parser object or NULL on failure
546  */
547 raptor_parser*
raptor_new_parser_v2(raptor_world * world,const char * name)548 raptor_new_parser_v2(raptor_world* world, const char *name) {
549   raptor_parser_factory* factory;
550   raptor_parser* rdf_parser;
551 
552   factory=raptor_get_parser_factory(world, name);
553   if(!factory)
554     return NULL;
555 
556   rdf_parser=(raptor_parser*)RAPTOR_CALLOC(raptor_parser, 1,
557                                            sizeof(raptor_parser));
558   if(!rdf_parser)
559     return NULL;
560 
561   rdf_parser->world=world;
562 
563   rdf_parser->context=(char*)RAPTOR_CALLOC(raptor_parser_context, 1,
564                                            factory->context_length);
565   if(!rdf_parser->context) {
566     raptor_free_parser(rdf_parser);
567     return NULL;
568   }
569 
570 #ifdef RAPTOR_XML_LIBXML
571   rdf_parser->magic=RAPTOR_LIBXML_MAGIC;
572 #endif
573   rdf_parser->factory=factory;
574 
575   rdf_parser->failed=0;
576 
577   rdf_parser->error_handlers.locator=&rdf_parser->locator;
578   rdf_parser->error_handlers.last_log_level=RAPTOR_LOG_LEVEL_LAST;
579   raptor_error_handlers_init_v2(rdf_parser->world, &rdf_parser->error_handlers);
580 
581   /* Initialise default (lax) feature values */
582   raptor_set_parser_strict(rdf_parser, 0);
583 
584   if(factory->init(rdf_parser, name)) {
585     raptor_free_parser(rdf_parser);
586     return NULL;
587   }
588 
589   return rdf_parser;
590 }
591 
592 
593 #ifndef RAPTOR_DISABLE_V1
594 /**
595  * raptor_new_parser_for_content:
596  * @uri: URI identifying the syntax (or NULL)
597  * @mime_type: mime type identifying the content (or NULL)
598  * @buffer: buffer of content to guess (or NULL)
599  * @len: length of buffer
600  * @identifier: identifier of content (or NULL)
601  *
602  * Constructor - create a new raptor_parser.
603  *
604  * Uses raptor_guess_parser_name() to find a parser by scoring
605  * recognition of the syntax by a block of characters, the content
606  * identifier or a mime type.  The content identifier is typically a
607  * filename or URI or some other identifier.
608  *
609  * raptor_init() MUST have been called before calling this function.
610  * Use raptor_new_parser_for_content_v2() if using raptor_world APIs.
611  *
612  * Return value: a new #raptor_parser object or NULL on failure
613  **/
614 raptor_parser*
raptor_new_parser_for_content(raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)615 raptor_new_parser_for_content(raptor_uri *uri, const char *mime_type,
616                               const unsigned char *buffer, size_t len,
617                               const unsigned char *identifier)
618 {
619   return raptor_new_parser_for_content_v2(raptor_world_instance(),
620                                           uri, mime_type,
621                                           buffer, len,
622                                           identifier);
623 }
624 #endif
625 
626 
627 /**
628  * raptor_new_parser_for_content_v2:
629  * @world: raptor_world object
630  * @uri: URI identifying the syntax (or NULL)
631  * @mime_type: mime type identifying the content (or NULL)
632  * @buffer: buffer of content to guess (or NULL)
633  * @len: length of buffer
634  * @identifier: identifier of content (or NULL)
635  *
636  * Constructor - create a new raptor_parser.
637  *
638  * Uses raptor_guess_parser_name() to find a parser by scoring
639  * recognition of the syntax by a block of characters, the content
640  * identifier or a mime type.  The content identifier is typically a
641  * filename or URI or some other identifier.
642  *
643  * Return value: a new #raptor_parser object or NULL on failure
644  **/
645 raptor_parser*
raptor_new_parser_for_content_v2(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)646 raptor_new_parser_for_content_v2(raptor_world* world,
647                                  raptor_uri *uri, const char *mime_type,
648                                  const unsigned char *buffer, size_t len,
649                                  const unsigned char *identifier)
650 {
651   return raptor_new_parser_v2(world,
652                               raptor_guess_parser_name_v2(world, uri, mime_type, buffer, len, identifier));
653 }
654 
655 
656 /**
657  * raptor_start_parse:
658  * @rdf_parser: RDF parser
659  * @uri: base URI or may be NULL if no base URI is required
660  *
661  * Start a parse of content with base URI.
662  *
663  * Parsers that need a base URI can be tested with raptor_get_need_base_uri().
664  *
665  * Return value: non-0 on failure, <0 if a required base URI was missing
666  **/
667 int
raptor_start_parse(raptor_parser * rdf_parser,raptor_uri * uri)668 raptor_start_parse(raptor_parser *rdf_parser, raptor_uri *uri)
669 {
670   if(rdf_parser->factory->need_base_uri && !uri) {
671     raptor_parser_error(rdf_parser, "Missing base URI for %s parser.",
672                         rdf_parser->factory->name);
673     return -1;
674   }
675 
676   if(uri)
677     uri=raptor_uri_copy_v2(rdf_parser->world, uri);
678 
679   if(rdf_parser->base_uri)
680     raptor_free_uri_v2(rdf_parser->world, rdf_parser->base_uri);
681   rdf_parser->base_uri=uri;
682 
683   rdf_parser->locator.uri    = uri;
684   rdf_parser->locator.line   = -1;
685   rdf_parser->locator.column = -1;
686   rdf_parser->locator.byte   = -1;
687 
688   if(rdf_parser->factory->start)
689     return rdf_parser->factory->start(rdf_parser);
690   else
691     return 0;
692 }
693 
694 
695 
696 
697 /**
698  * raptor_parse_chunk:
699  * @rdf_parser: RDF parser
700  * @buffer: content to parse
701  * @len: length of buffer
702  * @is_end: non-0 if this is the end of the content (such as EOF)
703  *
704  * Parse a block of content into triples.
705  *
706  * This method can only be called after raptor_start_parse has
707  * initialised the parser.
708  *
709  * Return value: non-0 on failure.
710  **/
711 int
raptor_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)712 raptor_parse_chunk(raptor_parser* rdf_parser,
713                    const unsigned char *buffer, size_t len, int is_end)
714 {
715   if(rdf_parser->sb)
716     raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1);
717 
718   return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end);
719 }
720 
721 
722 /**
723  * raptor_free_parser:
724  * @parser: #raptor_parser object
725  *
726  * Destructor - destroy a raptor_parser object.
727  *
728  **/
729 void
raptor_free_parser(raptor_parser * rdf_parser)730 raptor_free_parser(raptor_parser* rdf_parser)
731 {
732   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(rdf_parser, raptor_parser);
733 
734   if(rdf_parser->factory)
735     rdf_parser->factory->terminate(rdf_parser);
736 
737   if(rdf_parser->www)
738     raptor_www_free(rdf_parser->www);
739 
740   if(rdf_parser->context)
741     RAPTOR_FREE(raptor_parser_context, rdf_parser->context);
742 
743   if(rdf_parser->base_uri)
744     raptor_free_uri_v2(rdf_parser->world, rdf_parser->base_uri);
745 
746   if(rdf_parser->default_generate_id_handler_prefix)
747     RAPTOR_FREE(cstring, rdf_parser->default_generate_id_handler_prefix);
748 
749   if(rdf_parser->sb)
750     raptor_free_stringbuffer(rdf_parser->sb);
751 
752   if(rdf_parser->cache_control)
753     RAPTOR_FREE(cstring, rdf_parser->cache_control);
754 
755   if(rdf_parser->user_agent)
756     RAPTOR_FREE(cstring, rdf_parser->user_agent);
757 
758   RAPTOR_FREE(raptor_parser, rdf_parser);
759 }
760 
761 
762 /* Size of XML buffer to use when reading from a file */
763 #define RAPTOR_READ_BUFFER_SIZE 4096
764 
765 
766 /**
767  * raptor_parse_file_stream:
768  * @rdf_parser: parser
769  * @stream: FILE* of RDF content
770  * @filename: filename of content or NULL if it has no name
771  * @base_uri: the base URI to use
772  *
773  * Parse RDF content from a FILE*.
774  *
775  * After draining the stream, fclose is not called on it internally.
776  *
777  * Return value: non 0 on failure
778  **/
779 int
raptor_parse_file_stream(raptor_parser * rdf_parser,FILE * stream,const char * filename,raptor_uri * base_uri)780 raptor_parse_file_stream(raptor_parser* rdf_parser,
781                          FILE *stream, const char* filename,
782                          raptor_uri *base_uri)
783 {
784   /* Read buffer */
785   unsigned char buffer[RAPTOR_READ_BUFFER_SIZE+1];
786   int rc=0;
787   raptor_locator *locator=&rdf_parser->locator;
788 
789   if(!stream || !base_uri)
790     return 1;
791 
792   locator->line= locator->column = -1;
793   locator->file= filename;
794 
795   if(raptor_start_parse(rdf_parser, base_uri))
796     return 1;
797 
798   while(!feof(stream)) {
799     int len=fread(buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream);
800     int is_end=(len < RAPTOR_READ_BUFFER_SIZE);
801     buffer[len] = '\0';
802     rc=raptor_parse_chunk(rdf_parser, buffer, len, is_end);
803     if(rc || is_end)
804       break;
805   }
806 
807   return (rc != 0);
808 }
809 
810 
811 /**
812  * raptor_parse_file:
813  * @rdf_parser: parser
814  * @uri: URI of RDF content or NULL to read from standard input
815  * @base_uri: the base URI to use (or NULL if the same)
816  *
817  * Parse RDF content at a file URI.
818  *
819  * If uri is NULL (source is stdin), then the base_uri is required.
820  *
821  * Return value: non 0 on failure
822  **/
823 int
raptor_parse_file(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)824 raptor_parse_file(raptor_parser* rdf_parser, raptor_uri *uri,
825                   raptor_uri *base_uri)
826 {
827   int rc=0;
828   int free_base_uri=0;
829   const char *filename=NULL;
830   FILE *fh=NULL;
831 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
832   struct stat buf;
833 #endif
834 
835   if(uri) {
836     filename=raptor_uri_uri_string_to_filename(raptor_uri_as_string_v2(rdf_parser->world, uri));
837     if(!filename)
838       return 1;
839 
840 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
841     if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
842       raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'",
843                           filename);
844       goto cleanup;
845     }
846 #endif
847 
848     fh = fopen(filename, "r");
849     if(!fh) {
850       raptor_parser_error(rdf_parser, "file '%s' open failed - %s",
851                           filename, strerror(errno));
852       goto cleanup;
853     }
854     if(!base_uri) {
855       base_uri=raptor_uri_copy_v2(rdf_parser->world, uri);
856       free_base_uri=1;
857     }
858   } else {
859     if(!base_uri)
860       return 1;
861     fh=stdin;
862   }
863 
864   rc=raptor_parse_file_stream(rdf_parser, fh, filename, base_uri);
865 
866   cleanup:
867   if(uri) {
868     if(fh)
869       fclose(fh);
870     RAPTOR_FREE(cstring, (void*)filename);
871   }
872   if(free_base_uri)
873     raptor_free_uri_v2(rdf_parser->world, base_uri);
874 
875   return rc;
876 }
877 
878 
879 void
raptor_parse_uri_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)880 raptor_parse_uri_write_bytes(raptor_www* www,
881                              void *userdata, const void *ptr,
882                              size_t size, size_t nmemb)
883 {
884   raptor_parse_bytes_context* rpbc=(raptor_parse_bytes_context*)userdata;
885   int len=size*nmemb;
886 
887   if(!rpbc->started) {
888     raptor_uri* base_uri=rpbc->base_uri;
889 
890     if(!base_uri) {
891       rpbc->final_uri=raptor_www_get_final_uri(www);
892       /* base URI after URI resolution is finally chosen */
893       base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri;
894     }
895 
896     if(raptor_start_parse(rpbc->rdf_parser, base_uri))
897       raptor_www_abort(www, "Parsing failed");
898     rpbc->started=1;
899   }
900 
901   if(raptor_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0))
902     raptor_www_abort(www, "Parsing failed");
903 }
904 
905 
906 static void
raptor_parse_uri_content_type_handler(raptor_www * www,void * userdata,const char * content_type)907 raptor_parse_uri_content_type_handler(raptor_www* www, void* userdata,
908                                       const char* content_type)
909 {
910   raptor_parser* rdf_parser=(raptor_parser*)userdata;
911   if(rdf_parser->factory->content_type_handler)
912     rdf_parser->factory->content_type_handler(rdf_parser, content_type);
913 }
914 
915 
916 int
raptor_parse_uri_no_net_filter(void * user_data,raptor_uri * uri)917 raptor_parse_uri_no_net_filter(void *user_data, raptor_uri* uri)
918 {
919   raptor_parser* rdf_parser=(raptor_parser*)user_data;
920   unsigned char* uri_string=raptor_uri_as_string_v2(rdf_parser->world, uri);
921 
922   if(raptor_uri_uri_string_is_file_uri(uri_string))
923     return 0;
924 
925   raptor_parser_error((raptor_parser*)user_data,
926                       "Network fetch of URI '%s' denied", uri_string);
927   return 1;
928 }
929 
930 
931 /**
932  * raptor_parse_uri:
933  * @rdf_parser: parser
934  * @uri: URI of RDF content
935  * @base_uri: the base URI to use (or NULL if the same)
936  *
937  * Parse the RDF content at URI.
938  *
939  * Sends an HTTP Accept: header whent the URI is of the HTTP protocol,
940  * see raptor_parse_uri_with_connection() for details including
941  * how the @base_uri is used.
942  *
943  * Return value: non 0 on failure
944  **/
945 int
raptor_parse_uri(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)946 raptor_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri,
947                  raptor_uri *base_uri)
948 {
949   return raptor_parse_uri_with_connection(rdf_parser, uri, base_uri, NULL);
950 }
951 
952 
953 /**
954  * raptor_parse_uri_with_connection:
955  * @rdf_parser: parser
956  * @uri: URI of RDF content
957  * @base_uri: the base URI to use (or NULL if the same)
958  * @connection: connection object pointer or NULL to create a new one
959  *
960  * Parse RDF content at URI using existing WWW connection.
961  *
962  * If @base_uri is not given and during resolution of the URI, a
963  * protocol redirection occurs, the final resolved URI will be
964  * used as the base URI.  If redirection does not occur, the
965  * base URI will be @uri.
966  *
967  * If @base_uri is given, it overrides the process above.
968  *
969  * When @connection is NULL and a MIME Type exists for the parser
970  * type - such as returned by raptor_get_mime_type(parser) - this
971  * type is sent in an HTTP Accept: header in the form
972  * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is
973  * prefered rather than the sole answer.  The latter part may not be
974  * necessary but should ensure an HTTP 200 response.
975  *
976  * Return value: non 0 on failure
977  **/
978 int
raptor_parse_uri_with_connection(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri,void * connection)979 raptor_parse_uri_with_connection(raptor_parser* rdf_parser, raptor_uri *uri,
980                                  raptor_uri *base_uri, void *connection)
981 {
982   int ret=0;
983   raptor_parse_bytes_context rpbc;
984 
985   if(connection) {
986     if(rdf_parser->www)
987       raptor_www_free(rdf_parser->www);
988     rdf_parser->www=raptor_www_new_with_connection_v2(rdf_parser->world, connection);
989     if(!rdf_parser->www)
990       return 1;
991   } else {
992     const char *accept_h;
993 
994     if(rdf_parser->www)
995       raptor_www_free(rdf_parser->www);
996     rdf_parser->www=raptor_www_new_v2(rdf_parser->world);
997     if(!rdf_parser->www)
998       return 1;
999 
1000     accept_h=raptor_parser_get_accept_header(rdf_parser);
1001     if(accept_h) {
1002       raptor_www_set_http_accept(rdf_parser->www, accept_h);
1003       RAPTOR_FREE(cstring, accept_h);
1004     }
1005   }
1006 
1007   rpbc.rdf_parser=rdf_parser;
1008   rpbc.base_uri=base_uri;
1009   rpbc.final_uri=NULL;
1010   rpbc.started=0;
1011 
1012   if(rdf_parser->uri_filter)
1013     raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter,
1014                               rdf_parser->uri_filter_user_data);
1015   else if(rdf_parser->features[RAPTOR_FEATURE_NO_NET])
1016     raptor_www_set_uri_filter(rdf_parser->www, raptor_parse_uri_no_net_filter, rdf_parser);
1017 
1018   raptor_www_set_error_handler(rdf_parser->www,
1019                                rdf_parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1020                                rdf_parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data);
1021   raptor_www_set_write_bytes_handler(rdf_parser->www, raptor_parse_uri_write_bytes,
1022                                      &rpbc);
1023 
1024   raptor_www_set_content_type_handler(rdf_parser->www,
1025                                       raptor_parse_uri_content_type_handler,
1026                                       rdf_parser);
1027 
1028   raptor_www_set_http_cache_control(rdf_parser->www, rdf_parser->cache_control);
1029 
1030   if(rdf_parser->user_agent)
1031     raptor_www_set_user_agent(rdf_parser->www, rdf_parser->user_agent);
1032 
1033   ret=raptor_www_fetch(rdf_parser->www, uri);
1034 
1035   if(!rpbc.started && !ret)
1036     ret=raptor_start_parse(rdf_parser, base_uri);
1037 
1038   if(rpbc.final_uri)
1039     raptor_free_uri_v2(rdf_parser->world, rpbc.final_uri);
1040 
1041   if(ret) {
1042     raptor_www_free(rdf_parser->www);
1043     rdf_parser->www=NULL;
1044     return 1;
1045   }
1046 
1047   if(raptor_parse_chunk(rdf_parser, NULL, 0, 1))
1048     rdf_parser->failed=1;
1049 
1050   raptor_www_free(rdf_parser->www);
1051   rdf_parser->www=NULL;
1052 
1053   return rdf_parser->failed;
1054 }
1055 
1056 
1057 /*
1058  * raptor_parser_fatal_error - Fatal Error from a parser - Internal
1059  */
1060 void
raptor_parser_fatal_error(raptor_parser * parser,const char * message,...)1061 raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...)
1062 {
1063   va_list arguments;
1064 
1065   parser->failed=1;
1066 
1067   va_start(arguments, message);
1068   if(parser)
1069     raptor_log_error_varargs(parser->world,
1070                              RAPTOR_LOG_LEVEL_FATAL,
1071                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].handler,
1072                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].user_data,
1073                              &parser->locator,
1074                              message, arguments);
1075   else
1076     raptor_log_error_varargs(NULL,
1077                              RAPTOR_LOG_LEVEL_FATAL, NULL, NULL, NULL,
1078                              message, arguments);
1079   va_end(arguments);
1080 }
1081 
1082 
1083 /*
1084  * raptor_parser_error - Error from a parser - Internal
1085  */
1086 void
raptor_parser_error(raptor_parser * parser,const char * message,...)1087 raptor_parser_error(raptor_parser* parser, const char *message, ...)
1088 {
1089   va_list arguments;
1090 
1091   va_start(arguments, message);
1092 
1093   raptor_parser_error_varargs(parser, message, arguments);
1094 
1095   va_end(arguments);
1096 }
1097 
1098 
1099 /*
1100  * raptor_parser_simple_error - Error from a parser - Internal
1101  *
1102  * Matches the raptor_simple_message_handler API but same as
1103  * raptor_parser_error
1104  */
1105 void
raptor_parser_simple_error(void * user_data,const char * message,...)1106 raptor_parser_simple_error(void* user_data, const char *message, ...)
1107 {
1108   raptor_parser* parser=(raptor_parser*)user_data;
1109   va_list arguments;
1110 
1111   va_start(arguments, message);
1112 
1113   if(parser)
1114     raptor_log_error_varargs(parser->world,
1115                              RAPTOR_LOG_LEVEL_ERROR,
1116                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1117                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data,
1118                              &parser->locator,
1119                              message, arguments);
1120   else
1121     raptor_log_error_varargs(NULL,
1122                              RAPTOR_LOG_LEVEL_ERROR,
1123                              NULL, NULL, NULL,
1124                              message, arguments);
1125 
1126   va_end(arguments);
1127 }
1128 
1129 
1130 /**
1131  * raptor_parser_error_varargs:
1132  * @parser: parser
1133  * @message: error format message
1134  * @arguments: varargs for message
1135  *
1136  * Error from a parser - Internal.
1137  */
1138 void
raptor_parser_error_varargs(raptor_parser * parser,const char * message,va_list arguments)1139 raptor_parser_error_varargs(raptor_parser* parser, const char *message,
1140                             va_list arguments)
1141 {
1142   if(parser)
1143     raptor_log_error_varargs(parser->world,
1144                              RAPTOR_LOG_LEVEL_ERROR,
1145                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1146                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data,
1147                              &parser->locator,
1148                              message, arguments);
1149   else
1150     raptor_log_error_varargs(NULL,
1151                              RAPTOR_LOG_LEVEL_ERROR,
1152                              NULL, NULL, NULL,
1153                              message, arguments);
1154 }
1155 
1156 
1157 /*
1158  * raptor_parser_warning - Warning from a parser - Internal
1159  */
1160 void
raptor_parser_warning(raptor_parser * parser,const char * message,...)1161 raptor_parser_warning(raptor_parser* parser, const char *message, ...)
1162 {
1163   va_list arguments;
1164 
1165   va_start(arguments, message);
1166 
1167   if(parser)
1168     raptor_log_error_varargs(parser->world,
1169                              RAPTOR_LOG_LEVEL_WARNING,
1170                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].handler,
1171                              parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].user_data,
1172                              &parser->locator,
1173                              message, arguments);
1174   else
1175     raptor_log_error_varargs(NULL,
1176                              RAPTOR_LOG_LEVEL_WARNING,
1177                              NULL, NULL, NULL,
1178                              message, arguments);
1179 
1180   va_end(arguments);
1181 }
1182 
1183 
1184 
1185 /* PUBLIC FUNCTIONS */
1186 
1187 /**
1188  * raptor_set_fatal_error_handler:
1189  * @parser: the parser
1190  * @user_data: user data to pass to function
1191  * @handler: pointer to the function
1192  *
1193  * Set the parser error handling function.
1194  *
1195  * The function will receive callbacks when the parser fails.
1196  *
1197  **/
1198 void
raptor_set_fatal_error_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1199 raptor_set_fatal_error_handler(raptor_parser* parser, void *user_data,
1200                                raptor_message_handler handler)
1201 {
1202   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].user_data=user_data;
1203   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].handler=handler;
1204 }
1205 
1206 
1207 /**
1208  * raptor_set_error_handler:
1209  * @parser: the parser
1210  * @user_data: user data to pass to function
1211  * @handler: pointer to the function
1212  *
1213  * Set the parser error handling function.
1214  *
1215  * The function will receive callbacks when the parser fails.
1216  *
1217  **/
1218 void
raptor_set_error_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1219 raptor_set_error_handler(raptor_parser* parser, void *user_data,
1220                          raptor_message_handler handler)
1221 {
1222   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data=user_data;
1223   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler=handler;
1224 }
1225 
1226 
1227 /**
1228  * raptor_set_warning_handler:
1229  * @parser: the parser
1230  * @user_data: user data to pass to function
1231  * @handler: pointer to the function
1232  *
1233  * Set the parser warning handling function.
1234  *
1235  * The function will receive callbacks when the parser gives a warning.
1236  *
1237  **/
1238 void
raptor_set_warning_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1239 raptor_set_warning_handler(raptor_parser* parser, void *user_data,
1240                            raptor_message_handler handler)
1241 {
1242   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].user_data=user_data;
1243   parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].handler=handler;
1244 }
1245 
1246 
1247 /**
1248  * raptor_set_statement_handler:
1249  * @parser: #raptor_parser parser object
1250  * @user_data: user data pointer for callback
1251  * @handler: new statement callback function
1252  *
1253  * Set the statement handler function for the parser.
1254  *
1255  **/
1256 void
raptor_set_statement_handler(raptor_parser * parser,void * user_data,raptor_statement_handler handler)1257 raptor_set_statement_handler(raptor_parser* parser,
1258                              void *user_data,
1259                              raptor_statement_handler handler)
1260 {
1261   parser->user_data=user_data;
1262   parser->statement_handler=handler;
1263 }
1264 
1265 
1266 /**
1267  * raptor_set_graph_handler:
1268  * @parser: #raptor_parser parser object
1269  * @user_data: user data pointer for callback
1270  * @handler: new graph callback function
1271  *
1272  * Set the graph handler function for the parser.
1273  *
1274  **/
1275 void
raptor_set_graph_handler(raptor_parser * parser,void * user_data,raptor_graph_handler handler)1276 raptor_set_graph_handler(raptor_parser* parser,
1277 			 void *user_data,
1278 			 raptor_graph_handler handler)
1279 {
1280   parser->user_data=user_data;
1281   parser->graph_handler=handler;
1282 }
1283 
1284 
1285 /**
1286  * raptor_set_generate_id_handler:
1287  * @parser: #raptor_parser parser object
1288  * @user_data: user data pointer for callback
1289  * @handler: generate ID callback function
1290  *
1291  * Set the generate ID handler function for the parser.
1292  *
1293  * Sets the function to generate IDs for the parser.  The handler is
1294  * called with the @user_data parameter and an ID type of either
1295  * RAPTOR_GENID_TYPE_BNODEID or RAPTOR_GENID_TYPE_BAGID (latter is deprecated).
1296  *
1297  * The final argument of the callback method is user_bnodeid, the value of
1298  * the rdf:nodeID attribute that the user provided if any (or NULL).
1299  * It can either be returned directly as the generated value when present or
1300  * modified.  The passed in value must be free()d if it is not used.
1301  *
1302  * If handler is NULL, the default method is used
1303  *
1304  **/
1305 void
raptor_set_generate_id_handler(raptor_parser * parser,void * user_data,raptor_generate_id_handler handler)1306 raptor_set_generate_id_handler(raptor_parser* parser,
1307                                void *user_data,
1308                                raptor_generate_id_handler handler)
1309 {
1310   parser->generate_id_handler_user_data=user_data;
1311   parser->generate_id_handler=handler;
1312 }
1313 
1314 
1315 /**
1316  * raptor_set_namespace_handler:
1317  * @parser: #raptor_parser parser object
1318  * @user_data: user data pointer for callback
1319  * @handler: new namespace callback function
1320  *
1321  * Set the namespace handler function for the parser.
1322  *
1323  * When a prefix/namespace is seen in a parser, call the given
1324  * @handler with the prefix string and the #raptor_uri namespace URI.
1325  * Either can be NULL for the default prefix or default namespace.
1326  *
1327  * The handler function does not deal with duplicates so any
1328  * namespace may be declared multiple times.
1329  *
1330  **/
1331 void
raptor_set_namespace_handler(raptor_parser * parser,void * user_data,raptor_namespace_handler handler)1332 raptor_set_namespace_handler(raptor_parser* parser,
1333                              void *user_data,
1334                              raptor_namespace_handler handler)
1335 {
1336   parser->namespace_handler=handler;
1337   parser->namespace_handler_user_data=user_data;
1338 }
1339 
1340 
1341 /**
1342  * raptor_parser_set_uri_filter:
1343  * @parser: parser object
1344  * @filter: URI filter function
1345  * @user_data: User data to pass to filter function
1346  *
1347  * Set URI filter function for WWW retrieval.
1348  **/
1349 void
raptor_parser_set_uri_filter(raptor_parser * parser,raptor_uri_filter_func filter,void * user_data)1350 raptor_parser_set_uri_filter(raptor_parser* parser,
1351                              raptor_uri_filter_func filter,
1352                              void *user_data)
1353 {
1354   parser->uri_filter=filter;
1355   parser->uri_filter_user_data=user_data;
1356 }
1357 
1358 
1359 #ifndef RAPTOR_DISABLE_V1
1360 /**
1361  * raptor_features_enumerate:
1362  * @feature: feature enumeration (0+)
1363  * @name: pointer to store feature short name (or NULL)
1364  * @uri: pointer to store feature URI (or NULL)
1365  * @label: pointer to feature label (or NULL)
1366  *
1367  * Get list of syntax features.
1368  *
1369  * If uri is not NULL, a pointer to a new raptor_uri is returned
1370  * that must be freed by the caller with raptor_free_uri().
1371  *
1372  * raptor_init() MUST have been called before calling this function.
1373  * Use raptor_features_enumerate_v2() if using raptor_world APIs.
1374  *
1375  * Return value: 0 on success, <0 on failure, >0 if feature is unknown
1376  **/
1377 int
raptor_features_enumerate(const raptor_feature feature,const char ** name,raptor_uri ** uri,const char ** label)1378 raptor_features_enumerate(const raptor_feature feature,
1379                           const char **name,
1380                           raptor_uri **uri, const char **label)
1381 {
1382   return raptor_features_enumerate_v2(raptor_world_instance(),
1383                                       feature, name, uri, label);
1384 }
1385 #endif
1386 
1387 
1388 /**
1389  * raptor_features_enumerate_v2:
1390  * @world: raptor_world object
1391  * @feature: feature enumeration (0+)
1392  * @name: pointer to store feature short name (or NULL)
1393  * @uri: pointer to store feature URI (or NULL)
1394  * @label: pointer to feature label (or NULL)
1395  *
1396  * Get list of syntax features.
1397  *
1398  * If uri is not NULL, a pointer to a new raptor_uri is returned
1399  * that must be freed by the caller with raptor_free_uri_v2().
1400  *
1401  * Return value: 0 on success, <0 on failure, >0 if feature is unknown
1402  **/
1403 int
raptor_features_enumerate_v2(raptor_world * world,const raptor_feature feature,const char ** name,raptor_uri ** uri,const char ** label)1404 raptor_features_enumerate_v2(raptor_world* world,
1405                              const raptor_feature feature,
1406                              const char **name,
1407                              raptor_uri **uri, const char **label)
1408 {
1409   return raptor_features_enumerate_common(world, feature, name, uri, label, 1);
1410 }
1411 
1412 
1413 /**
1414  * raptor_set_feature:
1415  * @parser: #raptor_parser parser object
1416  * @feature: feature to set from enumerated #raptor_feature values
1417  * @value: integer feature value (0 or larger)
1418  *
1419  * Set various parser features.
1420  *
1421  * The allowed features are available via raptor_features_enumerate().
1422  *
1423  * Return value: non 0 on failure or if the feature is unknown
1424  **/
1425 int
raptor_set_feature(raptor_parser * parser,raptor_feature feature,int value)1426 raptor_set_feature(raptor_parser *parser, raptor_feature feature, int value)
1427 {
1428   if(value < 0)
1429     return -1;
1430 
1431   switch(feature) {
1432     case RAPTOR_FEATURE_SCANNING:
1433     case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
1434     case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
1435     case RAPTOR_FEATURE_ALLOW_BAGID:
1436     case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
1437     case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
1438     case RAPTOR_FEATURE_NON_NFC_FATAL:
1439     case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
1440     case RAPTOR_FEATURE_CHECK_RDF_ID:
1441     case RAPTOR_FEATURE_NO_NET:
1442     case RAPTOR_FEATURE_HTML_TAG_SOUP:
1443     case RAPTOR_FEATURE_MICROFORMATS:
1444     case RAPTOR_FEATURE_HTML_LINK:
1445     case RAPTOR_FEATURE_WWW_TIMEOUT:
1446     case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
1447       parser->features[(int)feature]=value;
1448       break;
1449 
1450     case RAPTOR_FEATURE_ASSUME_IS_RDF:
1451       break;
1452 
1453 
1454     case RAPTOR_FEATURE_WRITE_BASE_URI:
1455     case RAPTOR_FEATURE_RELATIVE_URIS:
1456     case RAPTOR_FEATURE_START_URI:
1457     case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
1458     case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
1459     case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
1460     case RAPTOR_FEATURE_WRITER_XML_VERSION:
1461     case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
1462 
1463     case RAPTOR_FEATURE_RESOURCE_BORDER:
1464     case RAPTOR_FEATURE_LITERAL_BORDER:
1465     case RAPTOR_FEATURE_BNODE_BORDER:
1466     case RAPTOR_FEATURE_RESOURCE_FILL:
1467     case RAPTOR_FEATURE_LITERAL_FILL:
1468     case RAPTOR_FEATURE_BNODE_FILL:
1469 
1470     case RAPTOR_FEATURE_JSON_CALLBACK:
1471     case RAPTOR_FEATURE_JSON_EXTRA_DATA:
1472     case RAPTOR_FEATURE_RSS_TRIPLES:
1473     case RAPTOR_FEATURE_ATOM_ENTRY_URI:
1474     case RAPTOR_FEATURE_PREFIX_ELEMENTS:
1475 
1476     case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
1477     case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
1478     default:
1479       return -1;
1480       break;
1481   }
1482 
1483   return 0;
1484 }
1485 
1486 
1487 /**
1488  * raptor_parser_set_feature_string:
1489  * @parser: #raptor_parser parser object
1490  * @feature: feature to set from enumerated #raptor_feature values
1491  * @value: feature value
1492  *
1493  * Set parser features with string values.
1494  *
1495  * The allowed features are available via raptor_features_enumerate().
1496  * If the feature type is integer, the value is interpreted as an integer.
1497  *
1498  * Return value: non 0 on failure or if the feature is unknown
1499  **/
1500 int
raptor_parser_set_feature_string(raptor_parser * parser,raptor_feature feature,const unsigned char * value)1501 raptor_parser_set_feature_string(raptor_parser *parser,
1502                                  raptor_feature feature,
1503                                  const unsigned char *value)
1504 {
1505   int value_is_string=(raptor_feature_value_type(feature) == 1);
1506   if(!value_is_string)
1507     return raptor_set_feature(parser, feature, atoi((const char*)value));
1508 
1509   if((feature == RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL) ||
1510      (feature == RAPTOR_FEATURE_WWW_HTTP_USER_AGENT)) {
1511     char *value_copy;
1512     size_t len=0;
1513     if(value)
1514       len=strlen((const char*)value);
1515     value_copy=(char*)RAPTOR_MALLOC(cstring, len+1);
1516     if(!value_copy)
1517       return 1;
1518 
1519     if(len)
1520       strncpy(value_copy, (const char*)value, len);
1521     value_copy[len]='\0';
1522 
1523     if(feature == RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL)
1524       parser->cache_control=value_copy;
1525     else
1526       parser->user_agent=value_copy;
1527 
1528     return 0;
1529   }
1530 
1531   return -1;
1532 }
1533 
1534 
1535 /**
1536  * raptor_get_feature:
1537  * @parser: #raptor_parser parser object
1538  * @feature: feature to get value
1539  *
1540  * Get various parser features.
1541  *
1542  * The allowed features are available via raptor_features_enumerate().
1543  *
1544  * Note: no feature value is negative
1545  *
1546  * Return value: feature value or < 0 for an illegal feature
1547  **/
1548 int
raptor_get_feature(raptor_parser * parser,raptor_feature feature)1549 raptor_get_feature(raptor_parser *parser, raptor_feature feature)
1550 {
1551   int result= -1;
1552 
1553   switch(feature) {
1554     case RAPTOR_FEATURE_SCANNING:
1555     case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
1556     case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
1557     case RAPTOR_FEATURE_ALLOW_BAGID:
1558     case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
1559     case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
1560     case RAPTOR_FEATURE_NON_NFC_FATAL:
1561     case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
1562     case RAPTOR_FEATURE_CHECK_RDF_ID:
1563     case RAPTOR_FEATURE_NO_NET:
1564     case RAPTOR_FEATURE_HTML_TAG_SOUP:
1565     case RAPTOR_FEATURE_MICROFORMATS:
1566     case RAPTOR_FEATURE_HTML_LINK:
1567     case RAPTOR_FEATURE_WWW_TIMEOUT:
1568     case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
1569       result = parser->features[(int)feature];
1570       break;
1571 
1572     case RAPTOR_FEATURE_ASSUME_IS_RDF:
1573       result=0;
1574       break;
1575 
1576     /* serializing features */
1577     case RAPTOR_FEATURE_WRITE_BASE_URI:
1578     case RAPTOR_FEATURE_RELATIVE_URIS:
1579     case RAPTOR_FEATURE_START_URI:
1580     case RAPTOR_FEATURE_RESOURCE_BORDER:
1581     case RAPTOR_FEATURE_LITERAL_BORDER:
1582     case RAPTOR_FEATURE_BNODE_BORDER:
1583     case RAPTOR_FEATURE_RESOURCE_FILL:
1584     case RAPTOR_FEATURE_LITERAL_FILL:
1585     case RAPTOR_FEATURE_BNODE_FILL:
1586     case RAPTOR_FEATURE_JSON_CALLBACK:
1587     case RAPTOR_FEATURE_JSON_EXTRA_DATA:
1588     case RAPTOR_FEATURE_RSS_TRIPLES:
1589     case RAPTOR_FEATURE_ATOM_ENTRY_URI:
1590     case RAPTOR_FEATURE_PREFIX_ELEMENTS:
1591 
1592     /* XML writer features */
1593     case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
1594     case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
1595     case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
1596     case RAPTOR_FEATURE_WRITER_XML_VERSION:
1597     case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
1598 
1599     /* WWW features */
1600     case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
1601     case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
1602 
1603     default:
1604       break;
1605   }
1606 
1607   return result;
1608 }
1609 
1610 
1611 /**
1612  * raptor_parser_get_feature_string:
1613  * @parser: #raptor_parser parser object
1614  * @feature: feature to get value
1615  *
1616  * Get parser features with string values.
1617  *
1618  * The allowed features are available via raptor_features_enumerate().
1619  * If a string is returned, it must be freed by the caller.
1620  *
1621  * Return value: feature value or NULL for an illegal feature or no value
1622  **/
1623 const unsigned char *
raptor_parser_get_feature_string(raptor_parser * parser,raptor_feature feature)1624 raptor_parser_get_feature_string(raptor_parser *parser,
1625                                  raptor_feature feature)
1626 {
1627   int value_is_string=(raptor_feature_value_type(feature) == 1);
1628   if(!value_is_string)
1629     return NULL;
1630 
1631   return NULL;
1632 }
1633 
1634 
1635 /**
1636  * raptor_set_parser_strict:
1637  * @rdf_parser: #raptor_parser object
1638  * @is_strict: Non 0 for strict parsing
1639  *
1640  * Set parser to strict / lax mode.
1641  *
1642  **/
1643 void
raptor_set_parser_strict(raptor_parser * rdf_parser,int is_strict)1644 raptor_set_parser_strict(raptor_parser* rdf_parser, int is_strict)
1645 {
1646   is_strict=(is_strict) ? 1 : 0;
1647 
1648   /* Initialise default parser mode */
1649   rdf_parser->features[RAPTOR_FEATURE_SCANNING]=0;
1650 
1651   rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES]=!is_strict;
1652   rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES]=!is_strict;
1653   rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]=!is_strict;
1654   rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST]=0;
1655   rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]=1;
1656   rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]=is_strict;
1657   rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]=!is_strict;
1658   rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]=1;
1659   rdf_parser->features[RAPTOR_FEATURE_HTML_TAG_SOUP]=!is_strict;
1660   rdf_parser->features[RAPTOR_FEATURE_MICROFORMATS]=!is_strict;
1661   rdf_parser->features[RAPTOR_FEATURE_HTML_LINK]=!is_strict;
1662 }
1663 
1664 
1665 /**
1666  * raptor_set_default_generate_id_parameters:
1667  * @rdf_parser: #raptor_parser object
1668  * @prefix: prefix string
1669  * @base: integer base identifier
1670  *
1671  * Set default ID generation parameters.
1672  *
1673  * Sets the parameters for the default algorithm used to generate IDs.
1674  * The default algorithm uses both @prefix and @base to generate a new
1675  * identifier.   The exact identifier generated is not guaranteed to
1676  * be a strict concatenation of @prefix and @base but will use both
1677  * parts. The @prefix parameter is copied to generate an ID.
1678  *
1679  * For finer control of the generated identifiers, use
1680  * raptor_set_default_generate_id_handler().
1681  *
1682  * If @prefix is NULL, the default prefix is used (currently "genid")
1683  * If @base is less than 1, it is initialised to 1.
1684  *
1685  **/
1686 void
raptor_set_default_generate_id_parameters(raptor_parser * rdf_parser,char * prefix,int base)1687 raptor_set_default_generate_id_parameters(raptor_parser* rdf_parser,
1688                                           char *prefix, int base)
1689 {
1690   char *prefix_copy=NULL;
1691   size_t length=0;
1692 
1693   if(--base<0)
1694     base=0;
1695 
1696   if(prefix) {
1697     length=strlen(prefix);
1698 
1699     prefix_copy=(char*)RAPTOR_MALLOC(cstring, length+1);
1700     if(!prefix_copy)
1701       return;
1702     strcpy(prefix_copy, prefix);
1703   }
1704 
1705   if(rdf_parser->default_generate_id_handler_prefix)
1706     RAPTOR_FREE(cstring, rdf_parser->default_generate_id_handler_prefix);
1707 
1708   rdf_parser->default_generate_id_handler_prefix=prefix_copy;
1709   rdf_parser->default_generate_id_handler_prefix_length=length;
1710   rdf_parser->default_generate_id_handler_base=base;
1711 }
1712 
1713 
1714 /**
1715  * raptor_get_name:
1716  * @rdf_parser: #raptor_parser parser object
1717  *
1718  * Get the name of a parser.
1719  *
1720  * Return value: the short name for the parser.
1721  **/
1722 const char*
raptor_get_name(raptor_parser * rdf_parser)1723 raptor_get_name(raptor_parser *rdf_parser)
1724 {
1725   if(rdf_parser->factory->get_name)
1726     return rdf_parser->factory->get_name(rdf_parser);
1727   else
1728     return rdf_parser->factory->name;
1729 }
1730 
1731 
1732 /**
1733  * raptor_get_label:
1734  * @rdf_parser: #raptor_parser parser object
1735  *
1736  * Get a descriptive label of a parser.
1737  *
1738  * Return value: a readable label for the parser.
1739  **/
1740 const char*
raptor_get_label(raptor_parser * rdf_parser)1741 raptor_get_label(raptor_parser *rdf_parser)
1742 {
1743   return rdf_parser->factory->label;
1744 }
1745 
1746 
1747 /**
1748  * raptor_get_mime_type:
1749  * @rdf_parser: #raptor_parser parser object
1750  *
1751  * Return MIME type for the parser.
1752  *
1753  * Return value: MIME type or NULL if none available
1754  **/
1755 const char*
raptor_get_mime_type(raptor_parser * rdf_parser)1756 raptor_get_mime_type(raptor_parser *rdf_parser)
1757 {
1758   const char *mime_type=NULL;
1759   if(rdf_parser->factory->mime_types) {
1760     raptor_type_q* tq;
1761     tq=(raptor_type_q*)raptor_sequence_get_at(rdf_parser->factory->mime_types, 0);
1762     if(tq)
1763       mime_type=tq->mime_type;
1764   }
1765 
1766   return mime_type;
1767 }
1768 
1769 
1770 /**
1771  * raptor_get_need_base_uri:
1772  * @rdf_parser: #raptor_parser parser object
1773  *
1774  * Get a boolean whether this parser needs a base URI to start parsing.
1775  *
1776  * Return value: non-0 if this parser needs a base URI
1777  **/
1778 int
raptor_get_need_base_uri(raptor_parser * rdf_parser)1779 raptor_get_need_base_uri(raptor_parser *rdf_parser)
1780 {
1781   return rdf_parser->factory->need_base_uri;
1782 }
1783 
1784 
1785 /**
1786  * raptor_parse_abort:
1787  * @rdf_parser: #raptor_parser parser object
1788  *
1789  * Abort an ongoing parse.
1790  *
1791  * Causes any ongoing generation of statements by a parser to be
1792  * terminated and the parser to return controlto the application
1793  * as soon as draining any existing buffers.
1794  *
1795  * Most useful inside raptor_parse_file or raptor_parse_uri when
1796  * the Raptor library is directing the parsing and when one of the
1797  * callback handlers such as as set by raptor_set_statement_handler
1798  * requires to return to the main application code.
1799  **/
1800 void
raptor_parse_abort(raptor_parser * rdf_parser)1801 raptor_parse_abort(raptor_parser *rdf_parser)
1802 {
1803   rdf_parser->failed=1;
1804 }
1805 
1806 
1807 static unsigned char*
raptor_default_generate_id_handler(void * user_data,raptor_genid_type type,unsigned char * user_bnodeid)1808 raptor_default_generate_id_handler(void *user_data, raptor_genid_type type,
1809                                    unsigned char *user_bnodeid)
1810 {
1811   raptor_parser *rdf_parser=(raptor_parser *)user_data;
1812   int id;
1813   unsigned char *buffer;
1814   int length;
1815   int tmpid;
1816 
1817   if(user_bnodeid)
1818     return user_bnodeid;
1819 
1820   id=++rdf_parser->default_generate_id_handler_base;
1821 
1822   tmpid=id;
1823   length=2; /* min length 1 + \0 */
1824   while(tmpid/=10)
1825     length++;
1826 
1827   if(rdf_parser->default_generate_id_handler_prefix)
1828     length += rdf_parser->default_generate_id_handler_prefix_length;
1829   else
1830     length += 5; /* genid */
1831 
1832   buffer=(unsigned char*)RAPTOR_MALLOC(cstring, length);
1833   if(!buffer)
1834     return NULL;
1835   if(rdf_parser->default_generate_id_handler_prefix) {
1836     strncpy((char*)buffer, rdf_parser->default_generate_id_handler_prefix,
1837             rdf_parser->default_generate_id_handler_prefix_length);
1838     sprintf((char*)buffer+rdf_parser->default_generate_id_handler_prefix_length,
1839             "%d", id);
1840   } else
1841     sprintf((char*)buffer, "genid%d", id);
1842 
1843   return buffer;
1844 }
1845 
1846 
1847 /**
1848  * raptor_parser_generate_id:
1849  * @rdf_parser: #raptor_parser parser object
1850  * @type: Type of ID to generate
1851  *
1852  * Generate an ID for a parser
1853  *
1854  * Type can be either RAPTOR_GENID_TYPE_BNODEID or
1855  * RAPTOR_GENID_TYPE_BAGID
1856  *
1857  * Return value: newly allocated generated ID or NULL on failure
1858  **/
1859 unsigned char*
raptor_parser_generate_id(raptor_parser * rdf_parser,raptor_genid_type type)1860 raptor_parser_generate_id(raptor_parser *rdf_parser, raptor_genid_type type)
1861 {
1862   if(type != RAPTOR_GENID_TYPE_BNODEID ||
1863      type != RAPTOR_GENID_TYPE_BAGID)
1864     return NULL;
1865 
1866   return raptor_parser_internal_generate_id(rdf_parser, type, NULL);
1867 }
1868 
1869 
1870 unsigned char*
raptor_parser_internal_generate_id(raptor_parser * rdf_parser,raptor_genid_type type,unsigned char * user_bnodeid)1871 raptor_parser_internal_generate_id(raptor_parser *rdf_parser,
1872                                    raptor_genid_type type,
1873                                    unsigned char *user_bnodeid)
1874 {
1875   if(rdf_parser->generate_id_handler)
1876     return rdf_parser->generate_id_handler(rdf_parser->generate_id_handler_user_data,
1877                                            type, user_bnodeid);
1878   else
1879     return raptor_default_generate_id_handler(rdf_parser, type, user_bnodeid);
1880 }
1881 
1882 
1883 /**
1884  * raptor_get_locator:
1885  * @rdf_parser: raptor parser
1886  *
1887  * Get the current raptor locator object.
1888  *
1889  * Return value: raptor locator
1890  **/
1891 raptor_locator*
raptor_get_locator(raptor_parser * rdf_parser)1892 raptor_get_locator(raptor_parser *rdf_parser)
1893 {
1894   return &rdf_parser->locator;
1895 }
1896 
1897 
1898 #ifdef RAPTOR_DEBUG
1899 void
raptor_stats_print(raptor_parser * rdf_parser,FILE * stream)1900 raptor_stats_print(raptor_parser *rdf_parser, FILE *stream)
1901 {
1902 #ifdef RAPTOR_PARSER_RDFXML
1903 #if RAPTOR_DEBUG > 1
1904   if(!strcmp(rdf_parser->factory->name, "rdfxml")) {
1905     raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1906     fputs("raptor parser stats\n  ", stream);
1907     raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream);
1908   }
1909 #endif
1910 #endif
1911 }
1912 #endif
1913 
1914 
1915 struct syntax_score
1916 {
1917   int score;
1918   raptor_parser_factory* factory;
1919 };
1920 
1921 
1922 static int
compare_syntax_score(const void * a,const void * b)1923 compare_syntax_score(const void *a, const void *b) {
1924   return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score;
1925 }
1926 
1927 
1928 #ifndef RAPTOR_DISABLE_V1
1929 /**
1930  * raptor_guess_parser_name:
1931  * @uri: URI identifying the syntax (or NULL)
1932  * @mime_type: mime type identifying the content (or NULL)
1933  * @buffer: buffer of content to guess (or NULL)
1934  * @len: length of buffer
1935  * @identifier: identifier of content (or NULL)
1936  *
1937  * Guess a parser name for content.
1938  *
1939  * Find a parser by scoring recognition of the syntax by a block of
1940  * characters, the content identifier or a mime type.  The content
1941  * identifier is typically a filename or URI or some other identifier.
1942  *
1943  * raptor_init() MUST have been called before calling this function.
1944  * Use raptor_guess_parser_name_v2() if using raptor_world APIs.
1945  *
1946  * Return value: a parser name or NULL if no guess could be made
1947  **/
1948 const char*
raptor_guess_parser_name(raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1949 raptor_guess_parser_name(raptor_uri *uri, const char *mime_type,
1950                          const unsigned char *buffer, size_t len,
1951                          const unsigned char *identifier)
1952 {
1953   return raptor_guess_parser_name_v2(raptor_world_instance(),
1954     uri, mime_type, buffer, len, identifier);
1955 }
1956 #endif
1957 
1958 
1959 /**
1960  * raptor_guess_parser_name_v2:
1961  * @world: raptor_world object
1962  * @uri: URI identifying the syntax (or NULL)
1963  * @mime_type: mime type identifying the content (or NULL)
1964  * @buffer: buffer of content to guess (or NULL)
1965  * @len: length of buffer
1966  * @identifier: identifier of content (or NULL)
1967  *
1968  * Guess a parser name for content.
1969  *
1970  * Find a parser by scoring recognition of the syntax by a block of
1971  * characters, the content identifier or a mime type.  The content
1972  * identifier is typically a filename or URI or some other identifier.
1973  *
1974  * Return value: a parser name or NULL if no guess could be made
1975  **/
1976 const char*
raptor_guess_parser_name_v2(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1977 raptor_guess_parser_name_v2(raptor_world* world,
1978                             raptor_uri *uri, const char *mime_type,
1979                             const unsigned char *buffer, size_t len,
1980                             const unsigned char *identifier)
1981 {
1982   unsigned int i;
1983   raptor_parser_factory *factory;
1984   unsigned char *suffix=NULL;
1985 /* FIXME - up to 10 parsers :) */
1986 #define MAX_PARSERS 10
1987   struct syntax_score scores[MAX_PARSERS];
1988 
1989   if(identifier) {
1990     unsigned char *p=(unsigned char*)strrchr((const char*)identifier, '.');
1991     if(p) {
1992       unsigned char *from, *to;
1993       p++;
1994       suffix=(unsigned char*)RAPTOR_MALLOC(cstring, strlen((const char*)p)+1);
1995       if(!suffix)
1996         return NULL;
1997       for(from=p, to=suffix; *from; ) {
1998         unsigned char c=*from++;
1999         /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */
2000         if(!isalpha(c) && !isdigit(c)) {
2001           RAPTOR_FREE(cstring, suffix);
2002           suffix=NULL;
2003           to=NULL;
2004           break;
2005         }
2006         *to++=isupper((char)c) ? (unsigned char)tolower((char)c): c;
2007       }
2008       if(to)
2009         *to='\0';
2010     }
2011   }
2012 
2013   for(i=0;
2014       (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2015       i++) {
2016     int score= -1;
2017     raptor_type_q* type_q=NULL;
2018 
2019     if(mime_type && factory->mime_types) {
2020       int j;
2021       type_q=NULL;
2022       for(j=0;
2023           (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2024           j++) {
2025         if(!strcmp(mime_type, type_q->mime_type))
2026           break;
2027       }
2028       /* got an exact match mime type - score it via the Q */
2029       if(type_q)
2030         score=type_q->q;
2031     }
2032     /* mime type match has high Q - return result */
2033     if(score >= 10)
2034       break;
2035 
2036     if(uri && factory->uri_string &&
2037        !strcmp((const char*)raptor_uri_as_string_v2(world, uri),
2038                (const char*)factory->uri_string))
2039       /* got an exact match syntax for URI - return result */
2040       break;
2041 
2042     if(factory->recognise_syntax) {
2043       int c= -1;
2044 
2045       /* Only use first N bytes to avoid HTML documents that contain
2046        * RDF/XML examples
2047        */
2048 #define FIRSTN 1024
2049       if(buffer && len && len > FIRSTN) {
2050         c=buffer[FIRSTN];
2051         ((char*)buffer)[FIRSTN]='\0';
2052       }
2053 
2054       score += factory->recognise_syntax(factory, buffer, len,
2055                                          identifier, suffix,
2056                                          mime_type);
2057 
2058       if(c >= 0)
2059         ((char*)buffer)[FIRSTN]=c;
2060     }
2061 
2062     if(i > MAX_PARSERS) {
2063       RAPTOR_DEBUG2("Number of parsers greater than static buffer size %d\n",
2064                     MAX_PARSERS);
2065       if(suffix)
2066         RAPTOR_FREE(cstring, suffix);
2067       return NULL;
2068     }
2069 
2070     scores[i].score=score < 10 ? score : 10; scores[i].factory=factory;
2071 #if RAPTOR_DEBUG > 2
2072     RAPTOR_DEBUG3("Score %15s : %d\n", factory->name, score);
2073 #endif
2074   }
2075 
2076   if(!factory) {
2077     /* sort the scores and pick a factory */
2078     qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score);
2079     if(scores[0].score >= 0)
2080       factory=scores[0].factory;
2081   }
2082 
2083   if(suffix)
2084     RAPTOR_FREE(cstring, suffix);
2085 
2086   return factory ? factory->name : NULL;
2087 }
2088 
2089 
2090 /*
2091  * raptor_parser_copy_user_state:
2092  * @to_parser: destination parser
2093  * @from_parser: source parser
2094  *
2095  * Copy user state between parsers - INTERNAL.
2096  *
2097  * Return value: non-0 on failure
2098  **/
2099 int
raptor_parser_copy_user_state(raptor_parser * to_parser,raptor_parser * from_parser)2100 raptor_parser_copy_user_state(raptor_parser *to_parser,
2101                               raptor_parser *from_parser)
2102 {
2103   int rc=0;
2104   int i;
2105 
2106   to_parser->user_data= from_parser->user_data;
2107   memcpy(&to_parser->error_handlers, &from_parser->error_handlers,
2108          sizeof(raptor_error_handlers));
2109   to_parser->statement_handler= from_parser->statement_handler;
2110   to_parser->generate_id_handler_user_data= from_parser->generate_id_handler_user_data;
2111   to_parser->generate_id_handler= from_parser->generate_id_handler;
2112   to_parser->default_generate_id_handler_base= from_parser->default_generate_id_handler_base;
2113   /* copy over non-shared user state - generate ID prefix string */
2114   if(from_parser->default_generate_id_handler_prefix) {
2115     size_t len=from_parser->default_generate_id_handler_prefix_length;
2116     to_parser->default_generate_id_handler_prefix=(char*)RAPTOR_MALLOC(cstring, len+1);
2117     if(to_parser->default_generate_id_handler_prefix)
2118       strncpy((char*)to_parser->default_generate_id_handler_prefix,
2119               (const char*)from_parser->default_generate_id_handler_prefix,
2120               len+1);
2121     else
2122       rc=1;
2123   }
2124   to_parser->default_generate_id_handler_prefix_length= from_parser->default_generate_id_handler_prefix_length;
2125   to_parser->namespace_handler= from_parser->namespace_handler;
2126   to_parser->namespace_handler_user_data= from_parser->namespace_handler_user_data;
2127   to_parser->uri_filter= from_parser->uri_filter;
2128   to_parser->uri_filter_user_data= from_parser->uri_filter_user_data;
2129 
2130   /* copy over Cache-Control: header */
2131   if(!rc && from_parser->cache_control) {
2132     size_t len=strlen(from_parser->cache_control);
2133     to_parser->cache_control=(char*)RAPTOR_MALLOC(cstring, len+1);
2134     if(to_parser->cache_control)
2135       strncpy((char*)to_parser->cache_control,
2136               (const char*)from_parser->cache_control,
2137               len+1);
2138     else
2139       rc=1;
2140   }
2141 
2142   /* copy over User-Agent: header */
2143   if(!rc && from_parser->user_agent) {
2144     size_t len=strlen(from_parser->user_agent);
2145     to_parser->user_agent=(char*)RAPTOR_MALLOC(cstring, len+1);
2146     if(to_parser->user_agent)
2147       strncpy((char*)to_parser->user_agent,
2148               (const char*)from_parser->user_agent,
2149               len+1);
2150     else
2151       rc=1;
2152   }
2153 
2154   /* copy features */
2155   for(i=0; i<= RAPTOR_FEATURE_LAST; i++)
2156     to_parser->features[i]= from_parser->features[i];
2157 
2158   return rc;
2159 }
2160 
2161 
2162 /*
2163  * raptor_parser_start_namespace:
2164  * @rdf_parser: parser
2165  * @nspace: namespace starting
2166  *
2167  * Internal - Invoke start namespace handler
2168  **/
2169 void
raptor_parser_start_namespace(raptor_parser * rdf_parser,raptor_namespace * nspace)2170 raptor_parser_start_namespace(raptor_parser* rdf_parser,
2171                               raptor_namespace* nspace)
2172 {
2173   if(!rdf_parser->namespace_handler)
2174     return;
2175 
2176   (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data,
2177                                    nspace);
2178 }
2179 
2180 
2181 /**
2182  * raptor_parser_get_accept_header:
2183  * @rdf_parser: parser
2184  *
2185  * Get an HTTP Accept value for the parser.
2186  *
2187  * The returned string must be freed by the caller such as with
2188  * raptor_free_memory().
2189  *
2190  * Return value: a new Accept: header string or NULL on failure
2191  **/
2192 const char*
raptor_parser_get_accept_header(raptor_parser * rdf_parser)2193 raptor_parser_get_accept_header(raptor_parser* rdf_parser)
2194 {
2195   raptor_parser_factory *factory=rdf_parser->factory;
2196   char *accept_header=NULL;
2197   size_t len;
2198   char *p;
2199   int i;
2200   raptor_type_q* type_q;
2201 
2202   if(factory->accept_header)
2203     return factory->accept_header(rdf_parser);
2204 
2205   if(!factory->mime_types)
2206     return NULL;
2207 
2208   len=0;
2209   for(i=0;
2210       (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, i));
2211       i++) {
2212     if(type_q->mime_type) {
2213       len+= type_q->mime_type_len + 2; /* ", " */
2214       if(type_q->q < 10)
2215         len+= 6; /* ";q=X.Y" */
2216     }
2217   }
2218 
2219   /* 9 = "\*\/\*;q=0.1" */
2220   accept_header=(char*)RAPTOR_MALLOC(cstring, len + 9 + 1);
2221   if(!accept_header)
2222     return NULL;
2223 
2224   p=accept_header;
2225   for(i=0;
2226       (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, i));
2227       i++) {
2228     if(type_q->mime_type) {
2229       strncpy(p, type_q->mime_type, type_q->mime_type_len);
2230       p+= type_q->mime_type_len;
2231       if(type_q->q < 10) {
2232         *p++ = ';';
2233         *p++ = 'q';
2234         *p++ = '=';
2235         *p++ = '0';
2236         *p++ = '.';
2237         *p++ = '0' + (type_q->q);
2238       }
2239     }
2240 
2241     *p++ = ',';
2242     *p++ = ' ';
2243   }
2244 
2245   strncpy(p, "*/*;q=0.1", 10);
2246 
2247   return accept_header;
2248 }
2249 
2250 
2251 const char*
raptor_parser_get_accept_header_all(raptor_world * world)2252 raptor_parser_get_accept_header_all(raptor_world* world)
2253 {
2254   raptor_parser_factory *factory;
2255   char *accept_header=NULL;
2256   size_t len;
2257   char *p;
2258   int i;
2259 
2260   len=0;
2261   for(i=0;
2262       (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2263       i++) {
2264     raptor_type_q* type_q;
2265     int j;
2266 
2267     for(j=0;
2268         (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2269         j++) {
2270       if(type_q->mime_type) {
2271         len+= type_q->mime_type_len + 2; /* ", " */
2272         if(type_q->q < 10)
2273           len+= 6; /* ";q=X.Y" */
2274       }
2275     }
2276   }
2277 
2278   /* 9 = "\*\/\*;q=0.1" */
2279   accept_header=(char*)RAPTOR_MALLOC(cstring, len + 9 + 1);
2280   if(!accept_header)
2281     return NULL;
2282 
2283   p=accept_header;
2284   for(i=0;
2285       (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2286       i++) {
2287     raptor_type_q* type_q;
2288     int j;
2289 
2290     for(j=0;
2291         (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2292         j++) {
2293       if(type_q->mime_type) {
2294         strncpy(p, type_q->mime_type, type_q->mime_type_len);
2295         p+= type_q->mime_type_len;
2296         if(type_q->q < 10) {
2297           *p++ = ';';
2298           *p++ = 'q';
2299           *p++ = '=';
2300           *p++ = '0';
2301           *p++ = '.';
2302           *p++ = '0' + (type_q->q);
2303         }
2304       }
2305 
2306       *p++ = ',';
2307       *p++ = ' ';
2308     }
2309 
2310   }
2311 
2312   strncpy(p, "*/*;q=0.1", 10);
2313 
2314   return accept_header;
2315 }
2316 
2317 
2318 void
raptor_parser_save_content(raptor_parser * rdf_parser,int save)2319 raptor_parser_save_content(raptor_parser* rdf_parser, int save)
2320 {
2321   if(rdf_parser->sb)
2322     raptor_free_stringbuffer(rdf_parser->sb);
2323 
2324   rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL;
2325 }
2326 
2327 
2328 const unsigned char*
raptor_parser_get_content(raptor_parser * rdf_parser,size_t * length_p)2329 raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p)
2330 {
2331   unsigned char* buffer;
2332   size_t len;
2333 
2334   if(!rdf_parser->sb)
2335     return NULL;
2336 
2337   len=raptor_stringbuffer_length(rdf_parser->sb);
2338   buffer=(unsigned char*)RAPTOR_MALLOC(cstring, len+1);
2339   if(!buffer)
2340     return NULL;
2341 
2342   raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len);
2343 
2344   if(length_p)
2345     *length_p=len;
2346 
2347   return buffer;
2348 }
2349 
2350 
2351 void
raptor_parser_set_graph_name(raptor_parser * parser,raptor_uri * uri)2352 raptor_parser_set_graph_name(raptor_parser* parser, raptor_uri* uri)
2353 {
2354   if(parser->graph_handler)
2355     (*parser->graph_handler)(parser->user_data, uri);
2356 }
2357 
2358 
2359 int
raptor_parser_get_current_base_id(raptor_parser * parser)2360 raptor_parser_get_current_base_id(raptor_parser* parser)
2361 {
2362   if(parser->factory->get_current_base_id)
2363     return parser->factory->get_current_base_id(parser);
2364   else
2365     return parser->default_generate_id_handler_base;
2366 }
2367 
2368 
2369 /**
2370  * raptor_parser_get_world:
2371  * @rdf_parser: parser
2372  *
2373  * Get the #raptor_world object associated with a parser.
2374  *
2375  * Return value: raptor_world* pointer
2376  **/
2377 raptor_world *
raptor_parser_get_world(raptor_parser * rdf_parser)2378 raptor_parser_get_world(raptor_parser* rdf_parser)
2379 {
2380   return rdf_parser->world;
2381 }
2382 
2383 
2384 /* end not STANDALONE */
2385 #endif
2386 
2387 
2388 #ifdef STANDALONE
2389 #include <stdio.h>
2390 
2391 int main(int argc, char *argv[]);
2392 
2393 
2394 int
main(int argc,char * argv[])2395 main(int argc, char *argv[])
2396 {
2397   raptor_world *world;
2398 #ifdef RAPTOR_DEBUG
2399   const char *program=raptor_basename(argv[0]);
2400 #endif
2401   int i;
2402   const char *s;
2403 
2404   world = raptor_new_world();
2405   if(!world || raptor_world_open(world))
2406     exit(1);
2407 
2408 #ifdef RAPTOR_DEBUG
2409   fprintf(stderr, "%s: Known features:\n", program);
2410 #endif
2411 
2412   for(i=0; i <= RAPTOR_FEATURE_LAST; i++) {
2413     const char *feature_name;
2414     const char *feature_label;
2415     raptor_uri *feature_uri;
2416     int fn;
2417 
2418     if(raptor_features_enumerate_v2(world, (raptor_feature)i,
2419                                     &feature_name, &feature_uri, &feature_label))
2420       continue;
2421 
2422 #ifdef RAPTOR_DEBUG
2423     fprintf(stderr, " %2d %-20s %s\n", i, feature_name, feature_label);
2424 #endif
2425     fn=raptor_feature_from_uri_v2(world, feature_uri);
2426     if(fn != i) {
2427       fprintf(stderr, "raptor_feature_from_uri returned %d expected %d\n", fn, i);
2428       return 1;
2429     }
2430     raptor_free_uri_v2(world, feature_uri);
2431   }
2432 
2433   s=raptor_parser_get_accept_header_all(world);
2434   fprintf(stderr, "Default HTTP accept header: '%s'\n", s);
2435   RAPTOR_FREE(cstring, s);
2436 
2437   raptor_free_world(world);
2438 
2439   return 0;
2440 }
2441 
2442 #endif
2443