1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_parse.c - Raptor Parser API
4 *
5 * Copyright (C) 2000-2009, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_SYS_STAT_H
46 #include <sys/stat.h>
47 #endif
48 #ifdef HAVE_FCNTL_H
49 #include <fcntl.h>
50 #endif
51
52 /* Raptor includes */
53 #include "raptor.h"
54 #include "raptor_internal.h"
55
56
57 #ifndef STANDALONE
58
59 /* prototypes for helper functions */
60 static void raptor_free_type_q(raptor_type_q* type_q);
61
62
63 /* helper methods */
64
65 static void
raptor_free_parser_factory(raptor_parser_factory * factory)66 raptor_free_parser_factory(raptor_parser_factory* factory)
67 {
68 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory);
69
70 if(factory->finish_factory)
71 factory->finish_factory(factory);
72
73 if(factory->name)
74 RAPTOR_FREE(raptor_parser_factory, (void*)factory->name);
75 if(factory->label)
76 RAPTOR_FREE(raptor_parser_factory, (void*)factory->label);
77 if(factory->alias)
78 RAPTOR_FREE(raptor_parser_factory, (void*)factory->alias);
79 if(factory->mime_types)
80 raptor_free_sequence(factory->mime_types);
81 if(factory->uri_string)
82 RAPTOR_FREE(raptor_parser_factory, (void*)factory->uri_string);
83
84 RAPTOR_FREE(raptor_parser_factory, factory);
85 }
86
87
88 /* class methods */
89
90 int
raptor_parsers_init(raptor_world * world)91 raptor_parsers_init(raptor_world *world)
92 {
93 int rc=0;
94
95 world->parsers=raptor_new_sequence((raptor_sequence_free_handler *)raptor_free_parser_factory, NULL);
96 if(!world->parsers)
97 return 1;
98
99 #ifdef RAPTOR_PARSER_RDFXML
100 rc+= raptor_init_parser_rdfxml(world) != 0;
101 #endif
102
103 #ifdef RAPTOR_PARSER_NTRIPLES
104 rc+= raptor_init_parser_ntriples(world) != 0;
105 #endif
106
107 #ifdef RAPTOR_PARSER_N3
108 rc+= raptor_init_parser_n3(world) != 0;
109 #endif
110
111 #ifdef RAPTOR_PARSER_TURTLE
112 rc+= raptor_init_parser_turtle(world) != 0;
113 #endif
114
115 #ifdef RAPTOR_PARSER_TRIG
116 rc+= raptor_init_parser_trig(world) != 0;
117 #endif
118
119 #ifdef RAPTOR_PARSER_RSS
120 rc+= raptor_init_parser_rss(world) != 0;
121 #endif
122
123 #if defined(RAPTOR_PARSER_GRDDL)
124 rc+= raptor_init_parser_grddl_common(world) != 0;
125
126 #ifdef RAPTOR_PARSER_GRDDL
127 rc+= raptor_init_parser_grddl(world) != 0;
128 #endif
129
130 #endif
131
132 #ifdef RAPTOR_PARSER_GUESS
133 rc+= raptor_init_parser_guess(world) != 0;
134 #endif
135
136 #ifdef RAPTOR_PARSER_RDFA
137 rc+= raptor_init_parser_rdfa(world) != 0;
138 #endif
139
140 return rc;
141 }
142
143
144 /*
145 * raptor_finish_parsers - delete all the registered parsers
146 */
147 void
raptor_parsers_finish(raptor_world * world)148 raptor_parsers_finish(raptor_world *world)
149 {
150 if(world->parsers) {
151 raptor_free_sequence(world->parsers);
152 world->parsers=NULL;
153 }
154 #if defined(RAPTOR_PARSER_GRDDL)
155 raptor_terminate_parser_grddl_common(world);
156 #endif
157 }
158
159
160 /*
161 * raptor_parser_register_factory:
162 * @name: the short syntax name
163 * @label: readable label for syntax
164 * @mime_type: MIME type of the syntax handled by the parser (or NULL)
165 * @uri_string: URI string of the syntax (or NULL)
166 * @factory: pointer to function to call to register the factory
167 *
168 * Register a syntax handled by a parser factory.
169 *
170 * INTERNAL
171 *
172 **/
173 RAPTOR_EXTERN_C
174 raptor_parser_factory*
raptor_parser_register_factory(raptor_world * world,const char * name,const char * label,int (* factory)(raptor_parser_factory *))175 raptor_parser_register_factory(raptor_world* world,
176 const char *name, const char *label,
177 int (*factory) (raptor_parser_factory*))
178 {
179 raptor_parser_factory *parser=NULL;
180 raptor_parser_factory *h;
181 char *name_copy, *label_copy;
182 int i;
183
184 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
185 RAPTOR_DEBUG3("Received registration for syntax %s '%s'\n", name, label);
186 #endif
187
188 for(i=0;
189 (h=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
190 i++) {
191 if(!strcmp(h->name, name)) {
192 RAPTOR_DEBUG2("parser %s already registered\n", h->name);
193 return NULL;
194 }
195 }
196
197 parser=(raptor_parser_factory*)RAPTOR_CALLOC(raptor_parser_factory, 1,
198 sizeof(raptor_parser_factory));
199 if(!parser)
200 return NULL;
201
202 parser->world=world;
203
204 name_copy=(char*)RAPTOR_CALLOC(cstring, strlen(name)+1, 1);
205 if(!name_copy)
206 goto tidy;
207 strcpy(name_copy, name);
208 parser->name=name_copy;
209
210 label_copy=(char*)RAPTOR_CALLOC(cstring, strlen(label)+1, 1);
211 if(!label_copy)
212 goto tidy;
213 strcpy(label_copy, label);
214 parser->label=label_copy;
215
216 parser->mime_types=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_type_q, NULL);
217 if(!parser->mime_types)
218 goto tidy;
219
220 if(raptor_sequence_push(world->parsers, parser))
221 return NULL; /* on error, parser is already freed by the sequence */
222
223 /* Call the parser registration function on the new object */
224 if (factory(parser))
225 return NULL; /* parser is owned and freed by the parsers sequence */
226
227 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
228 RAPTOR_DEBUG3("%s has context size %d\n", name, parser->context_length);
229 #endif
230
231 return parser;
232
233 /* Clean up on failure */
234 tidy:
235 raptor_free_parser_factory(parser);
236 return NULL;
237 }
238
239
240 int
raptor_parser_factory_add_alias(raptor_parser_factory * factory,const char * alias)241 raptor_parser_factory_add_alias(raptor_parser_factory* factory,
242 const char *alias)
243 {
244 raptor_parser_factory *p;
245 char *alias_copy;
246 int i;
247
248 for(i=0;
249 (p=(raptor_parser_factory*)raptor_sequence_get_at(factory->world->parsers, i));
250 i++) {
251 if(!strcmp(p->name, alias)) {
252 RAPTOR_DEBUG2("parser %s already registered\n", p->name);
253 return 1;
254 }
255 }
256
257 alias_copy=(char*)RAPTOR_CALLOC(cstring, strlen(alias)+1, 1);
258 if(!alias_copy)
259 return 1;
260 strcpy(alias_copy, alias);
261 factory->alias=alias_copy;
262
263 return 0;
264 }
265
266
267 static void
raptor_free_type_q(raptor_type_q * type_q)268 raptor_free_type_q(raptor_type_q* type_q)
269 {
270 RAPTOR_FREE(cstring, (void*)type_q->mime_type);
271 RAPTOR_FREE(raptor_type_q, (void*)type_q);
272 }
273
274
275 /**
276 * raptor_parser_factory_add_mime_type:
277 * @factory: Raptor parser factory
278 * @mime_type: MIME Type string
279 * @q: Accept 'Q' value 0 to 10 inclusive representing 0.0 to 1.0
280 *
281 * Register a MIME type as handled by a factory.
282 *
283 * The FIRST added MIME type is the default or main one reported.
284 *
285 * Return value: non-0 on failure
286 *
287 **/
288 int
raptor_parser_factory_add_mime_type(raptor_parser_factory * factory,const char * mime_type,int q)289 raptor_parser_factory_add_mime_type(raptor_parser_factory* factory,
290 const char* mime_type, int q)
291 {
292 raptor_type_q* type_q;
293 char* mime_type_copy;
294 size_t len;
295
296 type_q=(raptor_type_q*)RAPTOR_CALLOC(raptor_type_q, sizeof(raptor_type_q), 1);
297 if(!type_q)
298 return 1;
299 len=strlen(mime_type);
300 mime_type_copy=(char*)RAPTOR_CALLOC(cstring, len+1, 1);
301 if(!mime_type_copy) {
302 raptor_free_type_q(type_q);
303 return 1;
304 }
305 strcpy(mime_type_copy, mime_type);
306
307 type_q->mime_type=mime_type_copy;
308 type_q->mime_type_len=len;
309
310 if(q<0)
311 q=0;
312 if(q>10)
313 q=10;
314 type_q->q=q;
315
316 return raptor_sequence_push(factory->mime_types, type_q);
317 }
318
319
320 /**
321 * raptor_parser_factory_add_uri:
322 * @factory: Raptor parser factory
323 * @uri_string: URI string
324 *
325 * Register an identifying URI as handled by a factory.
326 *
327 * Return value: non-0 on failure
328 **/
329 int
raptor_parser_factory_add_uri(raptor_parser_factory * factory,const unsigned char * uri_string)330 raptor_parser_factory_add_uri(raptor_parser_factory* factory,
331 const unsigned char *uri_string)
332 {
333 unsigned char *uri_string_copy;
334
335 if(!uri_string)
336 return 1;
337
338 uri_string_copy=(unsigned char*)RAPTOR_CALLOC(cstring, strlen((const char*)uri_string)+1, 1);
339 if(!uri_string_copy)
340 return 1;
341
342 strcpy((char*)uri_string_copy, (const char*)uri_string);
343 factory->uri_string=uri_string_copy;
344
345 return 0;
346 }
347
348
349 /**
350 * raptor_get_parser_factory:
351 * @world: raptor_world object
352 * @name: the factory name or NULL for the default factory
353 *
354 * Get a parser factory by name.
355 *
356 * Return value: the factory object or NULL if there is no such factory
357 **/
358 raptor_parser_factory*
raptor_get_parser_factory(raptor_world * world,const char * name)359 raptor_get_parser_factory(raptor_world *world, const char *name)
360 {
361 raptor_parser_factory *factory;
362
363 /* return 1st parser if no particular one wanted - why? */
364 if(!name) {
365 factory=(raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0);
366 if(!factory) {
367 RAPTOR_DEBUG1("No (default) parsers registered\n");
368 return NULL;
369 }
370 } else {
371 int i;
372
373 for(i=0;
374 (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
375 i++) {
376 if(!strcmp(factory->name, name) ||
377 (factory->alias && !strcmp(factory->alias, name)))
378 break;
379 }
380 /* else FACTORY name not found */
381 if(!factory) {
382 RAPTOR_DEBUG2("No parser with name %s found\n", name);
383 return NULL;
384 }
385 }
386
387 return factory;
388 }
389
390
391 #ifndef RAPTOR_DISABLE_V1
392 /**
393 * raptor_syntaxes_enumerate:
394 * @counter: index into the list of syntaxes
395 * @name: pointer to store the name of the syntax (or NULL)
396 * @label: pointer to store syntax readable label (or NULL)
397 * @mime_type: pointer to store syntax MIME Type (or NULL)
398 * @uri_string: pointer to store syntax URI string (or NULL)
399 *
400 * Get information on syntaxes.
401 *
402 * raptor_init() MUST have been called before calling this function.
403 * Use raptor_syntaxes_enumerate_v2() if using raptor_world APIs.
404 *
405 * Return value: non 0 on failure of if counter is out of range
406 **/
407 int
raptor_syntaxes_enumerate(const unsigned int counter,const char ** name,const char ** label,const char ** mime_type,const unsigned char ** uri_string)408 raptor_syntaxes_enumerate(const unsigned int counter,
409 const char **name, const char **label,
410 const char **mime_type,
411 const unsigned char **uri_string)
412 {
413 return raptor_syntaxes_enumerate_v2(raptor_world_instance(),
414 counter, name, label, mime_type, uri_string);
415 }
416 #endif
417
418
419 /**
420 * raptor_syntaxes_enumerate_v2:
421 * @world: raptor_world object
422 * @counter: index into the list of syntaxes
423 * @name: pointer to store the name of the syntax (or NULL)
424 * @label: pointer to store syntax readable label (or NULL)
425 * @mime_type: pointer to store syntax MIME Type (or NULL)
426 * @uri_string: pointer to store syntax URI string (or NULL)
427 *
428 * Get information on syntaxes.
429 *
430 * Return value: non 0 on failure of if counter is out of range
431 **/
432 int
raptor_syntaxes_enumerate_v2(raptor_world * world,const unsigned int counter,const char ** name,const char ** label,const char ** mime_type,const unsigned char ** uri_string)433 raptor_syntaxes_enumerate_v2(raptor_world* world,
434 const unsigned int counter,
435 const char **name, const char **label,
436 const char **mime_type,
437 const unsigned char **uri_string)
438 {
439 raptor_parser_factory *factory;
440
441 factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers,
442 counter);
443
444 if(!factory)
445 return 1;
446
447 if(name)
448 *name=factory->name;
449 if(label)
450 *label=factory->label;
451 if(mime_type) {
452 const char *mime_type_t=NULL;
453 if(factory->mime_types) {
454 raptor_type_q* tq;
455 tq=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, 0);
456 if(tq)
457 mime_type_t=tq->mime_type;
458 }
459 *mime_type=mime_type_t;
460 }
461 if(uri_string)
462 *uri_string=factory->uri_string;
463 return 0;
464 }
465
466
467 #ifndef RAPTOR_DISABLE_V1
468 /**
469 * raptor_parsers_enumerate:
470 * @counter: index to list of parsers
471 * @name: pointer to store syntax name (or NULL)
472 * @label: pointer to store syntax label (or NULL)
473 *
474 * Get list of syntax parsers.
475 *
476 * Return value: non 0 on failure of if counter is out of range
477 **/
478 int
raptor_parsers_enumerate(const unsigned int counter,const char ** name,const char ** label)479 raptor_parsers_enumerate(const unsigned int counter,
480 const char **name, const char **label)
481 {
482 return raptor_syntaxes_enumerate(counter, name, label, NULL, NULL);
483 }
484
485
486 /**
487 * raptor_syntax_name_check:
488 * @name: the syntax name
489 *
490 * Check name of a parser.
491 *
492 * raptor_init() MUST have been called before calling this function.
493 * Use raptor_syntax_name_check_v2() if using raptor_world APIs.
494 *
495 * Return value: non 0 if name is a known syntax name
496 */
497 int
raptor_syntax_name_check(const char * name)498 raptor_syntax_name_check(const char *name) {
499 return raptor_syntax_name_check_v2(raptor_world_instance(), name);
500 }
501 #endif
502
503
504 /**
505 * raptor_syntax_name_check_v2:
506 * @world: raptor_world object
507 * @name: the syntax name
508 *
509 * Check name of a parser.
510 *
511 * Return value: non 0 if name is a known syntax name
512 */
513 int
raptor_syntax_name_check_v2(raptor_world * world,const char * name)514 raptor_syntax_name_check_v2(raptor_world* world, const char *name) {
515 return (raptor_get_parser_factory(world, name) != NULL);
516 }
517
518
519 #ifndef RAPTOR_DISABLE_V1
520 /**
521 * raptor_new_parser:
522 * @name: the parser name
523 *
524 * Constructor - create a new raptor_parser object.
525 *
526 * raptor_init() MUST have been called before calling this function.
527 * Use raptor_new_parser_v2() if using raptor_world APIs.
528 *
529 * Return value: a new #raptor_parser object or NULL on failure
530 */
531 raptor_parser*
raptor_new_parser(const char * name)532 raptor_new_parser(const char *name) {
533 return raptor_new_parser_v2(raptor_world_instance(), name);
534 }
535 #endif
536
537
538 /**
539 * raptor_new_parser_v2:
540 * @world: raptor_world object
541 * @name: the parser name
542 *
543 * Constructor - create a new raptor_parser object.
544 *
545 * Return value: a new #raptor_parser object or NULL on failure
546 */
547 raptor_parser*
raptor_new_parser_v2(raptor_world * world,const char * name)548 raptor_new_parser_v2(raptor_world* world, const char *name) {
549 raptor_parser_factory* factory;
550 raptor_parser* rdf_parser;
551
552 factory=raptor_get_parser_factory(world, name);
553 if(!factory)
554 return NULL;
555
556 rdf_parser=(raptor_parser*)RAPTOR_CALLOC(raptor_parser, 1,
557 sizeof(raptor_parser));
558 if(!rdf_parser)
559 return NULL;
560
561 rdf_parser->world=world;
562
563 rdf_parser->context=(char*)RAPTOR_CALLOC(raptor_parser_context, 1,
564 factory->context_length);
565 if(!rdf_parser->context) {
566 raptor_free_parser(rdf_parser);
567 return NULL;
568 }
569
570 #ifdef RAPTOR_XML_LIBXML
571 rdf_parser->magic=RAPTOR_LIBXML_MAGIC;
572 #endif
573 rdf_parser->factory=factory;
574
575 rdf_parser->failed=0;
576
577 rdf_parser->error_handlers.locator=&rdf_parser->locator;
578 rdf_parser->error_handlers.last_log_level=RAPTOR_LOG_LEVEL_LAST;
579 raptor_error_handlers_init_v2(rdf_parser->world, &rdf_parser->error_handlers);
580
581 /* Initialise default (lax) feature values */
582 raptor_set_parser_strict(rdf_parser, 0);
583
584 if(factory->init(rdf_parser, name)) {
585 raptor_free_parser(rdf_parser);
586 return NULL;
587 }
588
589 return rdf_parser;
590 }
591
592
593 #ifndef RAPTOR_DISABLE_V1
594 /**
595 * raptor_new_parser_for_content:
596 * @uri: URI identifying the syntax (or NULL)
597 * @mime_type: mime type identifying the content (or NULL)
598 * @buffer: buffer of content to guess (or NULL)
599 * @len: length of buffer
600 * @identifier: identifier of content (or NULL)
601 *
602 * Constructor - create a new raptor_parser.
603 *
604 * Uses raptor_guess_parser_name() to find a parser by scoring
605 * recognition of the syntax by a block of characters, the content
606 * identifier or a mime type. The content identifier is typically a
607 * filename or URI or some other identifier.
608 *
609 * raptor_init() MUST have been called before calling this function.
610 * Use raptor_new_parser_for_content_v2() if using raptor_world APIs.
611 *
612 * Return value: a new #raptor_parser object or NULL on failure
613 **/
614 raptor_parser*
raptor_new_parser_for_content(raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)615 raptor_new_parser_for_content(raptor_uri *uri, const char *mime_type,
616 const unsigned char *buffer, size_t len,
617 const unsigned char *identifier)
618 {
619 return raptor_new_parser_for_content_v2(raptor_world_instance(),
620 uri, mime_type,
621 buffer, len,
622 identifier);
623 }
624 #endif
625
626
627 /**
628 * raptor_new_parser_for_content_v2:
629 * @world: raptor_world object
630 * @uri: URI identifying the syntax (or NULL)
631 * @mime_type: mime type identifying the content (or NULL)
632 * @buffer: buffer of content to guess (or NULL)
633 * @len: length of buffer
634 * @identifier: identifier of content (or NULL)
635 *
636 * Constructor - create a new raptor_parser.
637 *
638 * Uses raptor_guess_parser_name() to find a parser by scoring
639 * recognition of the syntax by a block of characters, the content
640 * identifier or a mime type. The content identifier is typically a
641 * filename or URI or some other identifier.
642 *
643 * Return value: a new #raptor_parser object or NULL on failure
644 **/
645 raptor_parser*
raptor_new_parser_for_content_v2(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)646 raptor_new_parser_for_content_v2(raptor_world* world,
647 raptor_uri *uri, const char *mime_type,
648 const unsigned char *buffer, size_t len,
649 const unsigned char *identifier)
650 {
651 return raptor_new_parser_v2(world,
652 raptor_guess_parser_name_v2(world, uri, mime_type, buffer, len, identifier));
653 }
654
655
656 /**
657 * raptor_start_parse:
658 * @rdf_parser: RDF parser
659 * @uri: base URI or may be NULL if no base URI is required
660 *
661 * Start a parse of content with base URI.
662 *
663 * Parsers that need a base URI can be tested with raptor_get_need_base_uri().
664 *
665 * Return value: non-0 on failure, <0 if a required base URI was missing
666 **/
667 int
raptor_start_parse(raptor_parser * rdf_parser,raptor_uri * uri)668 raptor_start_parse(raptor_parser *rdf_parser, raptor_uri *uri)
669 {
670 if(rdf_parser->factory->need_base_uri && !uri) {
671 raptor_parser_error(rdf_parser, "Missing base URI for %s parser.",
672 rdf_parser->factory->name);
673 return -1;
674 }
675
676 if(uri)
677 uri=raptor_uri_copy_v2(rdf_parser->world, uri);
678
679 if(rdf_parser->base_uri)
680 raptor_free_uri_v2(rdf_parser->world, rdf_parser->base_uri);
681 rdf_parser->base_uri=uri;
682
683 rdf_parser->locator.uri = uri;
684 rdf_parser->locator.line = -1;
685 rdf_parser->locator.column = -1;
686 rdf_parser->locator.byte = -1;
687
688 if(rdf_parser->factory->start)
689 return rdf_parser->factory->start(rdf_parser);
690 else
691 return 0;
692 }
693
694
695
696
697 /**
698 * raptor_parse_chunk:
699 * @rdf_parser: RDF parser
700 * @buffer: content to parse
701 * @len: length of buffer
702 * @is_end: non-0 if this is the end of the content (such as EOF)
703 *
704 * Parse a block of content into triples.
705 *
706 * This method can only be called after raptor_start_parse has
707 * initialised the parser.
708 *
709 * Return value: non-0 on failure.
710 **/
711 int
raptor_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)712 raptor_parse_chunk(raptor_parser* rdf_parser,
713 const unsigned char *buffer, size_t len, int is_end)
714 {
715 if(rdf_parser->sb)
716 raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1);
717
718 return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end);
719 }
720
721
722 /**
723 * raptor_free_parser:
724 * @parser: #raptor_parser object
725 *
726 * Destructor - destroy a raptor_parser object.
727 *
728 **/
729 void
raptor_free_parser(raptor_parser * rdf_parser)730 raptor_free_parser(raptor_parser* rdf_parser)
731 {
732 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(rdf_parser, raptor_parser);
733
734 if(rdf_parser->factory)
735 rdf_parser->factory->terminate(rdf_parser);
736
737 if(rdf_parser->www)
738 raptor_www_free(rdf_parser->www);
739
740 if(rdf_parser->context)
741 RAPTOR_FREE(raptor_parser_context, rdf_parser->context);
742
743 if(rdf_parser->base_uri)
744 raptor_free_uri_v2(rdf_parser->world, rdf_parser->base_uri);
745
746 if(rdf_parser->default_generate_id_handler_prefix)
747 RAPTOR_FREE(cstring, rdf_parser->default_generate_id_handler_prefix);
748
749 if(rdf_parser->sb)
750 raptor_free_stringbuffer(rdf_parser->sb);
751
752 if(rdf_parser->cache_control)
753 RAPTOR_FREE(cstring, rdf_parser->cache_control);
754
755 if(rdf_parser->user_agent)
756 RAPTOR_FREE(cstring, rdf_parser->user_agent);
757
758 RAPTOR_FREE(raptor_parser, rdf_parser);
759 }
760
761
762 /* Size of XML buffer to use when reading from a file */
763 #define RAPTOR_READ_BUFFER_SIZE 4096
764
765
766 /**
767 * raptor_parse_file_stream:
768 * @rdf_parser: parser
769 * @stream: FILE* of RDF content
770 * @filename: filename of content or NULL if it has no name
771 * @base_uri: the base URI to use
772 *
773 * Parse RDF content from a FILE*.
774 *
775 * After draining the stream, fclose is not called on it internally.
776 *
777 * Return value: non 0 on failure
778 **/
779 int
raptor_parse_file_stream(raptor_parser * rdf_parser,FILE * stream,const char * filename,raptor_uri * base_uri)780 raptor_parse_file_stream(raptor_parser* rdf_parser,
781 FILE *stream, const char* filename,
782 raptor_uri *base_uri)
783 {
784 /* Read buffer */
785 unsigned char buffer[RAPTOR_READ_BUFFER_SIZE+1];
786 int rc=0;
787 raptor_locator *locator=&rdf_parser->locator;
788
789 if(!stream || !base_uri)
790 return 1;
791
792 locator->line= locator->column = -1;
793 locator->file= filename;
794
795 if(raptor_start_parse(rdf_parser, base_uri))
796 return 1;
797
798 while(!feof(stream)) {
799 int len=fread(buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream);
800 int is_end=(len < RAPTOR_READ_BUFFER_SIZE);
801 buffer[len] = '\0';
802 rc=raptor_parse_chunk(rdf_parser, buffer, len, is_end);
803 if(rc || is_end)
804 break;
805 }
806
807 return (rc != 0);
808 }
809
810
811 /**
812 * raptor_parse_file:
813 * @rdf_parser: parser
814 * @uri: URI of RDF content or NULL to read from standard input
815 * @base_uri: the base URI to use (or NULL if the same)
816 *
817 * Parse RDF content at a file URI.
818 *
819 * If uri is NULL (source is stdin), then the base_uri is required.
820 *
821 * Return value: non 0 on failure
822 **/
823 int
raptor_parse_file(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)824 raptor_parse_file(raptor_parser* rdf_parser, raptor_uri *uri,
825 raptor_uri *base_uri)
826 {
827 int rc=0;
828 int free_base_uri=0;
829 const char *filename=NULL;
830 FILE *fh=NULL;
831 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
832 struct stat buf;
833 #endif
834
835 if(uri) {
836 filename=raptor_uri_uri_string_to_filename(raptor_uri_as_string_v2(rdf_parser->world, uri));
837 if(!filename)
838 return 1;
839
840 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
841 if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
842 raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'",
843 filename);
844 goto cleanup;
845 }
846 #endif
847
848 fh = fopen(filename, "r");
849 if(!fh) {
850 raptor_parser_error(rdf_parser, "file '%s' open failed - %s",
851 filename, strerror(errno));
852 goto cleanup;
853 }
854 if(!base_uri) {
855 base_uri=raptor_uri_copy_v2(rdf_parser->world, uri);
856 free_base_uri=1;
857 }
858 } else {
859 if(!base_uri)
860 return 1;
861 fh=stdin;
862 }
863
864 rc=raptor_parse_file_stream(rdf_parser, fh, filename, base_uri);
865
866 cleanup:
867 if(uri) {
868 if(fh)
869 fclose(fh);
870 RAPTOR_FREE(cstring, (void*)filename);
871 }
872 if(free_base_uri)
873 raptor_free_uri_v2(rdf_parser->world, base_uri);
874
875 return rc;
876 }
877
878
879 void
raptor_parse_uri_write_bytes(raptor_www * www,void * userdata,const void * ptr,size_t size,size_t nmemb)880 raptor_parse_uri_write_bytes(raptor_www* www,
881 void *userdata, const void *ptr,
882 size_t size, size_t nmemb)
883 {
884 raptor_parse_bytes_context* rpbc=(raptor_parse_bytes_context*)userdata;
885 int len=size*nmemb;
886
887 if(!rpbc->started) {
888 raptor_uri* base_uri=rpbc->base_uri;
889
890 if(!base_uri) {
891 rpbc->final_uri=raptor_www_get_final_uri(www);
892 /* base URI after URI resolution is finally chosen */
893 base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri;
894 }
895
896 if(raptor_start_parse(rpbc->rdf_parser, base_uri))
897 raptor_www_abort(www, "Parsing failed");
898 rpbc->started=1;
899 }
900
901 if(raptor_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0))
902 raptor_www_abort(www, "Parsing failed");
903 }
904
905
906 static void
raptor_parse_uri_content_type_handler(raptor_www * www,void * userdata,const char * content_type)907 raptor_parse_uri_content_type_handler(raptor_www* www, void* userdata,
908 const char* content_type)
909 {
910 raptor_parser* rdf_parser=(raptor_parser*)userdata;
911 if(rdf_parser->factory->content_type_handler)
912 rdf_parser->factory->content_type_handler(rdf_parser, content_type);
913 }
914
915
916 int
raptor_parse_uri_no_net_filter(void * user_data,raptor_uri * uri)917 raptor_parse_uri_no_net_filter(void *user_data, raptor_uri* uri)
918 {
919 raptor_parser* rdf_parser=(raptor_parser*)user_data;
920 unsigned char* uri_string=raptor_uri_as_string_v2(rdf_parser->world, uri);
921
922 if(raptor_uri_uri_string_is_file_uri(uri_string))
923 return 0;
924
925 raptor_parser_error((raptor_parser*)user_data,
926 "Network fetch of URI '%s' denied", uri_string);
927 return 1;
928 }
929
930
931 /**
932 * raptor_parse_uri:
933 * @rdf_parser: parser
934 * @uri: URI of RDF content
935 * @base_uri: the base URI to use (or NULL if the same)
936 *
937 * Parse the RDF content at URI.
938 *
939 * Sends an HTTP Accept: header whent the URI is of the HTTP protocol,
940 * see raptor_parse_uri_with_connection() for details including
941 * how the @base_uri is used.
942 *
943 * Return value: non 0 on failure
944 **/
945 int
raptor_parse_uri(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri)946 raptor_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri,
947 raptor_uri *base_uri)
948 {
949 return raptor_parse_uri_with_connection(rdf_parser, uri, base_uri, NULL);
950 }
951
952
953 /**
954 * raptor_parse_uri_with_connection:
955 * @rdf_parser: parser
956 * @uri: URI of RDF content
957 * @base_uri: the base URI to use (or NULL if the same)
958 * @connection: connection object pointer or NULL to create a new one
959 *
960 * Parse RDF content at URI using existing WWW connection.
961 *
962 * If @base_uri is not given and during resolution of the URI, a
963 * protocol redirection occurs, the final resolved URI will be
964 * used as the base URI. If redirection does not occur, the
965 * base URI will be @uri.
966 *
967 * If @base_uri is given, it overrides the process above.
968 *
969 * When @connection is NULL and a MIME Type exists for the parser
970 * type - such as returned by raptor_get_mime_type(parser) - this
971 * type is sent in an HTTP Accept: header in the form
972 * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is
973 * prefered rather than the sole answer. The latter part may not be
974 * necessary but should ensure an HTTP 200 response.
975 *
976 * Return value: non 0 on failure
977 **/
978 int
raptor_parse_uri_with_connection(raptor_parser * rdf_parser,raptor_uri * uri,raptor_uri * base_uri,void * connection)979 raptor_parse_uri_with_connection(raptor_parser* rdf_parser, raptor_uri *uri,
980 raptor_uri *base_uri, void *connection)
981 {
982 int ret=0;
983 raptor_parse_bytes_context rpbc;
984
985 if(connection) {
986 if(rdf_parser->www)
987 raptor_www_free(rdf_parser->www);
988 rdf_parser->www=raptor_www_new_with_connection_v2(rdf_parser->world, connection);
989 if(!rdf_parser->www)
990 return 1;
991 } else {
992 const char *accept_h;
993
994 if(rdf_parser->www)
995 raptor_www_free(rdf_parser->www);
996 rdf_parser->www=raptor_www_new_v2(rdf_parser->world);
997 if(!rdf_parser->www)
998 return 1;
999
1000 accept_h=raptor_parser_get_accept_header(rdf_parser);
1001 if(accept_h) {
1002 raptor_www_set_http_accept(rdf_parser->www, accept_h);
1003 RAPTOR_FREE(cstring, accept_h);
1004 }
1005 }
1006
1007 rpbc.rdf_parser=rdf_parser;
1008 rpbc.base_uri=base_uri;
1009 rpbc.final_uri=NULL;
1010 rpbc.started=0;
1011
1012 if(rdf_parser->uri_filter)
1013 raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter,
1014 rdf_parser->uri_filter_user_data);
1015 else if(rdf_parser->features[RAPTOR_FEATURE_NO_NET])
1016 raptor_www_set_uri_filter(rdf_parser->www, raptor_parse_uri_no_net_filter, rdf_parser);
1017
1018 raptor_www_set_error_handler(rdf_parser->www,
1019 rdf_parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1020 rdf_parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data);
1021 raptor_www_set_write_bytes_handler(rdf_parser->www, raptor_parse_uri_write_bytes,
1022 &rpbc);
1023
1024 raptor_www_set_content_type_handler(rdf_parser->www,
1025 raptor_parse_uri_content_type_handler,
1026 rdf_parser);
1027
1028 raptor_www_set_http_cache_control(rdf_parser->www, rdf_parser->cache_control);
1029
1030 if(rdf_parser->user_agent)
1031 raptor_www_set_user_agent(rdf_parser->www, rdf_parser->user_agent);
1032
1033 ret=raptor_www_fetch(rdf_parser->www, uri);
1034
1035 if(!rpbc.started && !ret)
1036 ret=raptor_start_parse(rdf_parser, base_uri);
1037
1038 if(rpbc.final_uri)
1039 raptor_free_uri_v2(rdf_parser->world, rpbc.final_uri);
1040
1041 if(ret) {
1042 raptor_www_free(rdf_parser->www);
1043 rdf_parser->www=NULL;
1044 return 1;
1045 }
1046
1047 if(raptor_parse_chunk(rdf_parser, NULL, 0, 1))
1048 rdf_parser->failed=1;
1049
1050 raptor_www_free(rdf_parser->www);
1051 rdf_parser->www=NULL;
1052
1053 return rdf_parser->failed;
1054 }
1055
1056
1057 /*
1058 * raptor_parser_fatal_error - Fatal Error from a parser - Internal
1059 */
1060 void
raptor_parser_fatal_error(raptor_parser * parser,const char * message,...)1061 raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...)
1062 {
1063 va_list arguments;
1064
1065 parser->failed=1;
1066
1067 va_start(arguments, message);
1068 if(parser)
1069 raptor_log_error_varargs(parser->world,
1070 RAPTOR_LOG_LEVEL_FATAL,
1071 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].handler,
1072 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].user_data,
1073 &parser->locator,
1074 message, arguments);
1075 else
1076 raptor_log_error_varargs(NULL,
1077 RAPTOR_LOG_LEVEL_FATAL, NULL, NULL, NULL,
1078 message, arguments);
1079 va_end(arguments);
1080 }
1081
1082
1083 /*
1084 * raptor_parser_error - Error from a parser - Internal
1085 */
1086 void
raptor_parser_error(raptor_parser * parser,const char * message,...)1087 raptor_parser_error(raptor_parser* parser, const char *message, ...)
1088 {
1089 va_list arguments;
1090
1091 va_start(arguments, message);
1092
1093 raptor_parser_error_varargs(parser, message, arguments);
1094
1095 va_end(arguments);
1096 }
1097
1098
1099 /*
1100 * raptor_parser_simple_error - Error from a parser - Internal
1101 *
1102 * Matches the raptor_simple_message_handler API but same as
1103 * raptor_parser_error
1104 */
1105 void
raptor_parser_simple_error(void * user_data,const char * message,...)1106 raptor_parser_simple_error(void* user_data, const char *message, ...)
1107 {
1108 raptor_parser* parser=(raptor_parser*)user_data;
1109 va_list arguments;
1110
1111 va_start(arguments, message);
1112
1113 if(parser)
1114 raptor_log_error_varargs(parser->world,
1115 RAPTOR_LOG_LEVEL_ERROR,
1116 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1117 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data,
1118 &parser->locator,
1119 message, arguments);
1120 else
1121 raptor_log_error_varargs(NULL,
1122 RAPTOR_LOG_LEVEL_ERROR,
1123 NULL, NULL, NULL,
1124 message, arguments);
1125
1126 va_end(arguments);
1127 }
1128
1129
1130 /**
1131 * raptor_parser_error_varargs:
1132 * @parser: parser
1133 * @message: error format message
1134 * @arguments: varargs for message
1135 *
1136 * Error from a parser - Internal.
1137 */
1138 void
raptor_parser_error_varargs(raptor_parser * parser,const char * message,va_list arguments)1139 raptor_parser_error_varargs(raptor_parser* parser, const char *message,
1140 va_list arguments)
1141 {
1142 if(parser)
1143 raptor_log_error_varargs(parser->world,
1144 RAPTOR_LOG_LEVEL_ERROR,
1145 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler,
1146 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data,
1147 &parser->locator,
1148 message, arguments);
1149 else
1150 raptor_log_error_varargs(NULL,
1151 RAPTOR_LOG_LEVEL_ERROR,
1152 NULL, NULL, NULL,
1153 message, arguments);
1154 }
1155
1156
1157 /*
1158 * raptor_parser_warning - Warning from a parser - Internal
1159 */
1160 void
raptor_parser_warning(raptor_parser * parser,const char * message,...)1161 raptor_parser_warning(raptor_parser* parser, const char *message, ...)
1162 {
1163 va_list arguments;
1164
1165 va_start(arguments, message);
1166
1167 if(parser)
1168 raptor_log_error_varargs(parser->world,
1169 RAPTOR_LOG_LEVEL_WARNING,
1170 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].handler,
1171 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].user_data,
1172 &parser->locator,
1173 message, arguments);
1174 else
1175 raptor_log_error_varargs(NULL,
1176 RAPTOR_LOG_LEVEL_WARNING,
1177 NULL, NULL, NULL,
1178 message, arguments);
1179
1180 va_end(arguments);
1181 }
1182
1183
1184
1185 /* PUBLIC FUNCTIONS */
1186
1187 /**
1188 * raptor_set_fatal_error_handler:
1189 * @parser: the parser
1190 * @user_data: user data to pass to function
1191 * @handler: pointer to the function
1192 *
1193 * Set the parser error handling function.
1194 *
1195 * The function will receive callbacks when the parser fails.
1196 *
1197 **/
1198 void
raptor_set_fatal_error_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1199 raptor_set_fatal_error_handler(raptor_parser* parser, void *user_data,
1200 raptor_message_handler handler)
1201 {
1202 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].user_data=user_data;
1203 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_FATAL].handler=handler;
1204 }
1205
1206
1207 /**
1208 * raptor_set_error_handler:
1209 * @parser: the parser
1210 * @user_data: user data to pass to function
1211 * @handler: pointer to the function
1212 *
1213 * Set the parser error handling function.
1214 *
1215 * The function will receive callbacks when the parser fails.
1216 *
1217 **/
1218 void
raptor_set_error_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1219 raptor_set_error_handler(raptor_parser* parser, void *user_data,
1220 raptor_message_handler handler)
1221 {
1222 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].user_data=user_data;
1223 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_ERROR].handler=handler;
1224 }
1225
1226
1227 /**
1228 * raptor_set_warning_handler:
1229 * @parser: the parser
1230 * @user_data: user data to pass to function
1231 * @handler: pointer to the function
1232 *
1233 * Set the parser warning handling function.
1234 *
1235 * The function will receive callbacks when the parser gives a warning.
1236 *
1237 **/
1238 void
raptor_set_warning_handler(raptor_parser * parser,void * user_data,raptor_message_handler handler)1239 raptor_set_warning_handler(raptor_parser* parser, void *user_data,
1240 raptor_message_handler handler)
1241 {
1242 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].user_data=user_data;
1243 parser->error_handlers.handlers[RAPTOR_LOG_LEVEL_WARNING].handler=handler;
1244 }
1245
1246
1247 /**
1248 * raptor_set_statement_handler:
1249 * @parser: #raptor_parser parser object
1250 * @user_data: user data pointer for callback
1251 * @handler: new statement callback function
1252 *
1253 * Set the statement handler function for the parser.
1254 *
1255 **/
1256 void
raptor_set_statement_handler(raptor_parser * parser,void * user_data,raptor_statement_handler handler)1257 raptor_set_statement_handler(raptor_parser* parser,
1258 void *user_data,
1259 raptor_statement_handler handler)
1260 {
1261 parser->user_data=user_data;
1262 parser->statement_handler=handler;
1263 }
1264
1265
1266 /**
1267 * raptor_set_graph_handler:
1268 * @parser: #raptor_parser parser object
1269 * @user_data: user data pointer for callback
1270 * @handler: new graph callback function
1271 *
1272 * Set the graph handler function for the parser.
1273 *
1274 **/
1275 void
raptor_set_graph_handler(raptor_parser * parser,void * user_data,raptor_graph_handler handler)1276 raptor_set_graph_handler(raptor_parser* parser,
1277 void *user_data,
1278 raptor_graph_handler handler)
1279 {
1280 parser->user_data=user_data;
1281 parser->graph_handler=handler;
1282 }
1283
1284
1285 /**
1286 * raptor_set_generate_id_handler:
1287 * @parser: #raptor_parser parser object
1288 * @user_data: user data pointer for callback
1289 * @handler: generate ID callback function
1290 *
1291 * Set the generate ID handler function for the parser.
1292 *
1293 * Sets the function to generate IDs for the parser. The handler is
1294 * called with the @user_data parameter and an ID type of either
1295 * RAPTOR_GENID_TYPE_BNODEID or RAPTOR_GENID_TYPE_BAGID (latter is deprecated).
1296 *
1297 * The final argument of the callback method is user_bnodeid, the value of
1298 * the rdf:nodeID attribute that the user provided if any (or NULL).
1299 * It can either be returned directly as the generated value when present or
1300 * modified. The passed in value must be free()d if it is not used.
1301 *
1302 * If handler is NULL, the default method is used
1303 *
1304 **/
1305 void
raptor_set_generate_id_handler(raptor_parser * parser,void * user_data,raptor_generate_id_handler handler)1306 raptor_set_generate_id_handler(raptor_parser* parser,
1307 void *user_data,
1308 raptor_generate_id_handler handler)
1309 {
1310 parser->generate_id_handler_user_data=user_data;
1311 parser->generate_id_handler=handler;
1312 }
1313
1314
1315 /**
1316 * raptor_set_namespace_handler:
1317 * @parser: #raptor_parser parser object
1318 * @user_data: user data pointer for callback
1319 * @handler: new namespace callback function
1320 *
1321 * Set the namespace handler function for the parser.
1322 *
1323 * When a prefix/namespace is seen in a parser, call the given
1324 * @handler with the prefix string and the #raptor_uri namespace URI.
1325 * Either can be NULL for the default prefix or default namespace.
1326 *
1327 * The handler function does not deal with duplicates so any
1328 * namespace may be declared multiple times.
1329 *
1330 **/
1331 void
raptor_set_namespace_handler(raptor_parser * parser,void * user_data,raptor_namespace_handler handler)1332 raptor_set_namespace_handler(raptor_parser* parser,
1333 void *user_data,
1334 raptor_namespace_handler handler)
1335 {
1336 parser->namespace_handler=handler;
1337 parser->namespace_handler_user_data=user_data;
1338 }
1339
1340
1341 /**
1342 * raptor_parser_set_uri_filter:
1343 * @parser: parser object
1344 * @filter: URI filter function
1345 * @user_data: User data to pass to filter function
1346 *
1347 * Set URI filter function for WWW retrieval.
1348 **/
1349 void
raptor_parser_set_uri_filter(raptor_parser * parser,raptor_uri_filter_func filter,void * user_data)1350 raptor_parser_set_uri_filter(raptor_parser* parser,
1351 raptor_uri_filter_func filter,
1352 void *user_data)
1353 {
1354 parser->uri_filter=filter;
1355 parser->uri_filter_user_data=user_data;
1356 }
1357
1358
1359 #ifndef RAPTOR_DISABLE_V1
1360 /**
1361 * raptor_features_enumerate:
1362 * @feature: feature enumeration (0+)
1363 * @name: pointer to store feature short name (or NULL)
1364 * @uri: pointer to store feature URI (or NULL)
1365 * @label: pointer to feature label (or NULL)
1366 *
1367 * Get list of syntax features.
1368 *
1369 * If uri is not NULL, a pointer to a new raptor_uri is returned
1370 * that must be freed by the caller with raptor_free_uri().
1371 *
1372 * raptor_init() MUST have been called before calling this function.
1373 * Use raptor_features_enumerate_v2() if using raptor_world APIs.
1374 *
1375 * Return value: 0 on success, <0 on failure, >0 if feature is unknown
1376 **/
1377 int
raptor_features_enumerate(const raptor_feature feature,const char ** name,raptor_uri ** uri,const char ** label)1378 raptor_features_enumerate(const raptor_feature feature,
1379 const char **name,
1380 raptor_uri **uri, const char **label)
1381 {
1382 return raptor_features_enumerate_v2(raptor_world_instance(),
1383 feature, name, uri, label);
1384 }
1385 #endif
1386
1387
1388 /**
1389 * raptor_features_enumerate_v2:
1390 * @world: raptor_world object
1391 * @feature: feature enumeration (0+)
1392 * @name: pointer to store feature short name (or NULL)
1393 * @uri: pointer to store feature URI (or NULL)
1394 * @label: pointer to feature label (or NULL)
1395 *
1396 * Get list of syntax features.
1397 *
1398 * If uri is not NULL, a pointer to a new raptor_uri is returned
1399 * that must be freed by the caller with raptor_free_uri_v2().
1400 *
1401 * Return value: 0 on success, <0 on failure, >0 if feature is unknown
1402 **/
1403 int
raptor_features_enumerate_v2(raptor_world * world,const raptor_feature feature,const char ** name,raptor_uri ** uri,const char ** label)1404 raptor_features_enumerate_v2(raptor_world* world,
1405 const raptor_feature feature,
1406 const char **name,
1407 raptor_uri **uri, const char **label)
1408 {
1409 return raptor_features_enumerate_common(world, feature, name, uri, label, 1);
1410 }
1411
1412
1413 /**
1414 * raptor_set_feature:
1415 * @parser: #raptor_parser parser object
1416 * @feature: feature to set from enumerated #raptor_feature values
1417 * @value: integer feature value (0 or larger)
1418 *
1419 * Set various parser features.
1420 *
1421 * The allowed features are available via raptor_features_enumerate().
1422 *
1423 * Return value: non 0 on failure or if the feature is unknown
1424 **/
1425 int
raptor_set_feature(raptor_parser * parser,raptor_feature feature,int value)1426 raptor_set_feature(raptor_parser *parser, raptor_feature feature, int value)
1427 {
1428 if(value < 0)
1429 return -1;
1430
1431 switch(feature) {
1432 case RAPTOR_FEATURE_SCANNING:
1433 case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
1434 case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
1435 case RAPTOR_FEATURE_ALLOW_BAGID:
1436 case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
1437 case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
1438 case RAPTOR_FEATURE_NON_NFC_FATAL:
1439 case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
1440 case RAPTOR_FEATURE_CHECK_RDF_ID:
1441 case RAPTOR_FEATURE_NO_NET:
1442 case RAPTOR_FEATURE_HTML_TAG_SOUP:
1443 case RAPTOR_FEATURE_MICROFORMATS:
1444 case RAPTOR_FEATURE_HTML_LINK:
1445 case RAPTOR_FEATURE_WWW_TIMEOUT:
1446 case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
1447 parser->features[(int)feature]=value;
1448 break;
1449
1450 case RAPTOR_FEATURE_ASSUME_IS_RDF:
1451 break;
1452
1453
1454 case RAPTOR_FEATURE_WRITE_BASE_URI:
1455 case RAPTOR_FEATURE_RELATIVE_URIS:
1456 case RAPTOR_FEATURE_START_URI:
1457 case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
1458 case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
1459 case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
1460 case RAPTOR_FEATURE_WRITER_XML_VERSION:
1461 case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
1462
1463 case RAPTOR_FEATURE_RESOURCE_BORDER:
1464 case RAPTOR_FEATURE_LITERAL_BORDER:
1465 case RAPTOR_FEATURE_BNODE_BORDER:
1466 case RAPTOR_FEATURE_RESOURCE_FILL:
1467 case RAPTOR_FEATURE_LITERAL_FILL:
1468 case RAPTOR_FEATURE_BNODE_FILL:
1469
1470 case RAPTOR_FEATURE_JSON_CALLBACK:
1471 case RAPTOR_FEATURE_JSON_EXTRA_DATA:
1472 case RAPTOR_FEATURE_RSS_TRIPLES:
1473 case RAPTOR_FEATURE_ATOM_ENTRY_URI:
1474 case RAPTOR_FEATURE_PREFIX_ELEMENTS:
1475
1476 case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
1477 case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
1478 default:
1479 return -1;
1480 break;
1481 }
1482
1483 return 0;
1484 }
1485
1486
1487 /**
1488 * raptor_parser_set_feature_string:
1489 * @parser: #raptor_parser parser object
1490 * @feature: feature to set from enumerated #raptor_feature values
1491 * @value: feature value
1492 *
1493 * Set parser features with string values.
1494 *
1495 * The allowed features are available via raptor_features_enumerate().
1496 * If the feature type is integer, the value is interpreted as an integer.
1497 *
1498 * Return value: non 0 on failure or if the feature is unknown
1499 **/
1500 int
raptor_parser_set_feature_string(raptor_parser * parser,raptor_feature feature,const unsigned char * value)1501 raptor_parser_set_feature_string(raptor_parser *parser,
1502 raptor_feature feature,
1503 const unsigned char *value)
1504 {
1505 int value_is_string=(raptor_feature_value_type(feature) == 1);
1506 if(!value_is_string)
1507 return raptor_set_feature(parser, feature, atoi((const char*)value));
1508
1509 if((feature == RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL) ||
1510 (feature == RAPTOR_FEATURE_WWW_HTTP_USER_AGENT)) {
1511 char *value_copy;
1512 size_t len=0;
1513 if(value)
1514 len=strlen((const char*)value);
1515 value_copy=(char*)RAPTOR_MALLOC(cstring, len+1);
1516 if(!value_copy)
1517 return 1;
1518
1519 if(len)
1520 strncpy(value_copy, (const char*)value, len);
1521 value_copy[len]='\0';
1522
1523 if(feature == RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL)
1524 parser->cache_control=value_copy;
1525 else
1526 parser->user_agent=value_copy;
1527
1528 return 0;
1529 }
1530
1531 return -1;
1532 }
1533
1534
1535 /**
1536 * raptor_get_feature:
1537 * @parser: #raptor_parser parser object
1538 * @feature: feature to get value
1539 *
1540 * Get various parser features.
1541 *
1542 * The allowed features are available via raptor_features_enumerate().
1543 *
1544 * Note: no feature value is negative
1545 *
1546 * Return value: feature value or < 0 for an illegal feature
1547 **/
1548 int
raptor_get_feature(raptor_parser * parser,raptor_feature feature)1549 raptor_get_feature(raptor_parser *parser, raptor_feature feature)
1550 {
1551 int result= -1;
1552
1553 switch(feature) {
1554 case RAPTOR_FEATURE_SCANNING:
1555 case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
1556 case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
1557 case RAPTOR_FEATURE_ALLOW_BAGID:
1558 case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
1559 case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
1560 case RAPTOR_FEATURE_NON_NFC_FATAL:
1561 case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
1562 case RAPTOR_FEATURE_CHECK_RDF_ID:
1563 case RAPTOR_FEATURE_NO_NET:
1564 case RAPTOR_FEATURE_HTML_TAG_SOUP:
1565 case RAPTOR_FEATURE_MICROFORMATS:
1566 case RAPTOR_FEATURE_HTML_LINK:
1567 case RAPTOR_FEATURE_WWW_TIMEOUT:
1568 case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
1569 result = parser->features[(int)feature];
1570 break;
1571
1572 case RAPTOR_FEATURE_ASSUME_IS_RDF:
1573 result=0;
1574 break;
1575
1576 /* serializing features */
1577 case RAPTOR_FEATURE_WRITE_BASE_URI:
1578 case RAPTOR_FEATURE_RELATIVE_URIS:
1579 case RAPTOR_FEATURE_START_URI:
1580 case RAPTOR_FEATURE_RESOURCE_BORDER:
1581 case RAPTOR_FEATURE_LITERAL_BORDER:
1582 case RAPTOR_FEATURE_BNODE_BORDER:
1583 case RAPTOR_FEATURE_RESOURCE_FILL:
1584 case RAPTOR_FEATURE_LITERAL_FILL:
1585 case RAPTOR_FEATURE_BNODE_FILL:
1586 case RAPTOR_FEATURE_JSON_CALLBACK:
1587 case RAPTOR_FEATURE_JSON_EXTRA_DATA:
1588 case RAPTOR_FEATURE_RSS_TRIPLES:
1589 case RAPTOR_FEATURE_ATOM_ENTRY_URI:
1590 case RAPTOR_FEATURE_PREFIX_ELEMENTS:
1591
1592 /* XML writer features */
1593 case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
1594 case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
1595 case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
1596 case RAPTOR_FEATURE_WRITER_XML_VERSION:
1597 case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
1598
1599 /* WWW features */
1600 case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
1601 case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
1602
1603 default:
1604 break;
1605 }
1606
1607 return result;
1608 }
1609
1610
1611 /**
1612 * raptor_parser_get_feature_string:
1613 * @parser: #raptor_parser parser object
1614 * @feature: feature to get value
1615 *
1616 * Get parser features with string values.
1617 *
1618 * The allowed features are available via raptor_features_enumerate().
1619 * If a string is returned, it must be freed by the caller.
1620 *
1621 * Return value: feature value or NULL for an illegal feature or no value
1622 **/
1623 const unsigned char *
raptor_parser_get_feature_string(raptor_parser * parser,raptor_feature feature)1624 raptor_parser_get_feature_string(raptor_parser *parser,
1625 raptor_feature feature)
1626 {
1627 int value_is_string=(raptor_feature_value_type(feature) == 1);
1628 if(!value_is_string)
1629 return NULL;
1630
1631 return NULL;
1632 }
1633
1634
1635 /**
1636 * raptor_set_parser_strict:
1637 * @rdf_parser: #raptor_parser object
1638 * @is_strict: Non 0 for strict parsing
1639 *
1640 * Set parser to strict / lax mode.
1641 *
1642 **/
1643 void
raptor_set_parser_strict(raptor_parser * rdf_parser,int is_strict)1644 raptor_set_parser_strict(raptor_parser* rdf_parser, int is_strict)
1645 {
1646 is_strict=(is_strict) ? 1 : 0;
1647
1648 /* Initialise default parser mode */
1649 rdf_parser->features[RAPTOR_FEATURE_SCANNING]=0;
1650
1651 rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES]=!is_strict;
1652 rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES]=!is_strict;
1653 rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]=!is_strict;
1654 rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST]=0;
1655 rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]=1;
1656 rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]=is_strict;
1657 rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]=!is_strict;
1658 rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]=1;
1659 rdf_parser->features[RAPTOR_FEATURE_HTML_TAG_SOUP]=!is_strict;
1660 rdf_parser->features[RAPTOR_FEATURE_MICROFORMATS]=!is_strict;
1661 rdf_parser->features[RAPTOR_FEATURE_HTML_LINK]=!is_strict;
1662 }
1663
1664
1665 /**
1666 * raptor_set_default_generate_id_parameters:
1667 * @rdf_parser: #raptor_parser object
1668 * @prefix: prefix string
1669 * @base: integer base identifier
1670 *
1671 * Set default ID generation parameters.
1672 *
1673 * Sets the parameters for the default algorithm used to generate IDs.
1674 * The default algorithm uses both @prefix and @base to generate a new
1675 * identifier. The exact identifier generated is not guaranteed to
1676 * be a strict concatenation of @prefix and @base but will use both
1677 * parts. The @prefix parameter is copied to generate an ID.
1678 *
1679 * For finer control of the generated identifiers, use
1680 * raptor_set_default_generate_id_handler().
1681 *
1682 * If @prefix is NULL, the default prefix is used (currently "genid")
1683 * If @base is less than 1, it is initialised to 1.
1684 *
1685 **/
1686 void
raptor_set_default_generate_id_parameters(raptor_parser * rdf_parser,char * prefix,int base)1687 raptor_set_default_generate_id_parameters(raptor_parser* rdf_parser,
1688 char *prefix, int base)
1689 {
1690 char *prefix_copy=NULL;
1691 size_t length=0;
1692
1693 if(--base<0)
1694 base=0;
1695
1696 if(prefix) {
1697 length=strlen(prefix);
1698
1699 prefix_copy=(char*)RAPTOR_MALLOC(cstring, length+1);
1700 if(!prefix_copy)
1701 return;
1702 strcpy(prefix_copy, prefix);
1703 }
1704
1705 if(rdf_parser->default_generate_id_handler_prefix)
1706 RAPTOR_FREE(cstring, rdf_parser->default_generate_id_handler_prefix);
1707
1708 rdf_parser->default_generate_id_handler_prefix=prefix_copy;
1709 rdf_parser->default_generate_id_handler_prefix_length=length;
1710 rdf_parser->default_generate_id_handler_base=base;
1711 }
1712
1713
1714 /**
1715 * raptor_get_name:
1716 * @rdf_parser: #raptor_parser parser object
1717 *
1718 * Get the name of a parser.
1719 *
1720 * Return value: the short name for the parser.
1721 **/
1722 const char*
raptor_get_name(raptor_parser * rdf_parser)1723 raptor_get_name(raptor_parser *rdf_parser)
1724 {
1725 if(rdf_parser->factory->get_name)
1726 return rdf_parser->factory->get_name(rdf_parser);
1727 else
1728 return rdf_parser->factory->name;
1729 }
1730
1731
1732 /**
1733 * raptor_get_label:
1734 * @rdf_parser: #raptor_parser parser object
1735 *
1736 * Get a descriptive label of a parser.
1737 *
1738 * Return value: a readable label for the parser.
1739 **/
1740 const char*
raptor_get_label(raptor_parser * rdf_parser)1741 raptor_get_label(raptor_parser *rdf_parser)
1742 {
1743 return rdf_parser->factory->label;
1744 }
1745
1746
1747 /**
1748 * raptor_get_mime_type:
1749 * @rdf_parser: #raptor_parser parser object
1750 *
1751 * Return MIME type for the parser.
1752 *
1753 * Return value: MIME type or NULL if none available
1754 **/
1755 const char*
raptor_get_mime_type(raptor_parser * rdf_parser)1756 raptor_get_mime_type(raptor_parser *rdf_parser)
1757 {
1758 const char *mime_type=NULL;
1759 if(rdf_parser->factory->mime_types) {
1760 raptor_type_q* tq;
1761 tq=(raptor_type_q*)raptor_sequence_get_at(rdf_parser->factory->mime_types, 0);
1762 if(tq)
1763 mime_type=tq->mime_type;
1764 }
1765
1766 return mime_type;
1767 }
1768
1769
1770 /**
1771 * raptor_get_need_base_uri:
1772 * @rdf_parser: #raptor_parser parser object
1773 *
1774 * Get a boolean whether this parser needs a base URI to start parsing.
1775 *
1776 * Return value: non-0 if this parser needs a base URI
1777 **/
1778 int
raptor_get_need_base_uri(raptor_parser * rdf_parser)1779 raptor_get_need_base_uri(raptor_parser *rdf_parser)
1780 {
1781 return rdf_parser->factory->need_base_uri;
1782 }
1783
1784
1785 /**
1786 * raptor_parse_abort:
1787 * @rdf_parser: #raptor_parser parser object
1788 *
1789 * Abort an ongoing parse.
1790 *
1791 * Causes any ongoing generation of statements by a parser to be
1792 * terminated and the parser to return controlto the application
1793 * as soon as draining any existing buffers.
1794 *
1795 * Most useful inside raptor_parse_file or raptor_parse_uri when
1796 * the Raptor library is directing the parsing and when one of the
1797 * callback handlers such as as set by raptor_set_statement_handler
1798 * requires to return to the main application code.
1799 **/
1800 void
raptor_parse_abort(raptor_parser * rdf_parser)1801 raptor_parse_abort(raptor_parser *rdf_parser)
1802 {
1803 rdf_parser->failed=1;
1804 }
1805
1806
1807 static unsigned char*
raptor_default_generate_id_handler(void * user_data,raptor_genid_type type,unsigned char * user_bnodeid)1808 raptor_default_generate_id_handler(void *user_data, raptor_genid_type type,
1809 unsigned char *user_bnodeid)
1810 {
1811 raptor_parser *rdf_parser=(raptor_parser *)user_data;
1812 int id;
1813 unsigned char *buffer;
1814 int length;
1815 int tmpid;
1816
1817 if(user_bnodeid)
1818 return user_bnodeid;
1819
1820 id=++rdf_parser->default_generate_id_handler_base;
1821
1822 tmpid=id;
1823 length=2; /* min length 1 + \0 */
1824 while(tmpid/=10)
1825 length++;
1826
1827 if(rdf_parser->default_generate_id_handler_prefix)
1828 length += rdf_parser->default_generate_id_handler_prefix_length;
1829 else
1830 length += 5; /* genid */
1831
1832 buffer=(unsigned char*)RAPTOR_MALLOC(cstring, length);
1833 if(!buffer)
1834 return NULL;
1835 if(rdf_parser->default_generate_id_handler_prefix) {
1836 strncpy((char*)buffer, rdf_parser->default_generate_id_handler_prefix,
1837 rdf_parser->default_generate_id_handler_prefix_length);
1838 sprintf((char*)buffer+rdf_parser->default_generate_id_handler_prefix_length,
1839 "%d", id);
1840 } else
1841 sprintf((char*)buffer, "genid%d", id);
1842
1843 return buffer;
1844 }
1845
1846
1847 /**
1848 * raptor_parser_generate_id:
1849 * @rdf_parser: #raptor_parser parser object
1850 * @type: Type of ID to generate
1851 *
1852 * Generate an ID for a parser
1853 *
1854 * Type can be either RAPTOR_GENID_TYPE_BNODEID or
1855 * RAPTOR_GENID_TYPE_BAGID
1856 *
1857 * Return value: newly allocated generated ID or NULL on failure
1858 **/
1859 unsigned char*
raptor_parser_generate_id(raptor_parser * rdf_parser,raptor_genid_type type)1860 raptor_parser_generate_id(raptor_parser *rdf_parser, raptor_genid_type type)
1861 {
1862 if(type != RAPTOR_GENID_TYPE_BNODEID ||
1863 type != RAPTOR_GENID_TYPE_BAGID)
1864 return NULL;
1865
1866 return raptor_parser_internal_generate_id(rdf_parser, type, NULL);
1867 }
1868
1869
1870 unsigned char*
raptor_parser_internal_generate_id(raptor_parser * rdf_parser,raptor_genid_type type,unsigned char * user_bnodeid)1871 raptor_parser_internal_generate_id(raptor_parser *rdf_parser,
1872 raptor_genid_type type,
1873 unsigned char *user_bnodeid)
1874 {
1875 if(rdf_parser->generate_id_handler)
1876 return rdf_parser->generate_id_handler(rdf_parser->generate_id_handler_user_data,
1877 type, user_bnodeid);
1878 else
1879 return raptor_default_generate_id_handler(rdf_parser, type, user_bnodeid);
1880 }
1881
1882
1883 /**
1884 * raptor_get_locator:
1885 * @rdf_parser: raptor parser
1886 *
1887 * Get the current raptor locator object.
1888 *
1889 * Return value: raptor locator
1890 **/
1891 raptor_locator*
raptor_get_locator(raptor_parser * rdf_parser)1892 raptor_get_locator(raptor_parser *rdf_parser)
1893 {
1894 return &rdf_parser->locator;
1895 }
1896
1897
1898 #ifdef RAPTOR_DEBUG
1899 void
raptor_stats_print(raptor_parser * rdf_parser,FILE * stream)1900 raptor_stats_print(raptor_parser *rdf_parser, FILE *stream)
1901 {
1902 #ifdef RAPTOR_PARSER_RDFXML
1903 #if RAPTOR_DEBUG > 1
1904 if(!strcmp(rdf_parser->factory->name, "rdfxml")) {
1905 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1906 fputs("raptor parser stats\n ", stream);
1907 raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream);
1908 }
1909 #endif
1910 #endif
1911 }
1912 #endif
1913
1914
1915 struct syntax_score
1916 {
1917 int score;
1918 raptor_parser_factory* factory;
1919 };
1920
1921
1922 static int
compare_syntax_score(const void * a,const void * b)1923 compare_syntax_score(const void *a, const void *b) {
1924 return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score;
1925 }
1926
1927
1928 #ifndef RAPTOR_DISABLE_V1
1929 /**
1930 * raptor_guess_parser_name:
1931 * @uri: URI identifying the syntax (or NULL)
1932 * @mime_type: mime type identifying the content (or NULL)
1933 * @buffer: buffer of content to guess (or NULL)
1934 * @len: length of buffer
1935 * @identifier: identifier of content (or NULL)
1936 *
1937 * Guess a parser name for content.
1938 *
1939 * Find a parser by scoring recognition of the syntax by a block of
1940 * characters, the content identifier or a mime type. The content
1941 * identifier is typically a filename or URI or some other identifier.
1942 *
1943 * raptor_init() MUST have been called before calling this function.
1944 * Use raptor_guess_parser_name_v2() if using raptor_world APIs.
1945 *
1946 * Return value: a parser name or NULL if no guess could be made
1947 **/
1948 const char*
raptor_guess_parser_name(raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1949 raptor_guess_parser_name(raptor_uri *uri, const char *mime_type,
1950 const unsigned char *buffer, size_t len,
1951 const unsigned char *identifier)
1952 {
1953 return raptor_guess_parser_name_v2(raptor_world_instance(),
1954 uri, mime_type, buffer, len, identifier);
1955 }
1956 #endif
1957
1958
1959 /**
1960 * raptor_guess_parser_name_v2:
1961 * @world: raptor_world object
1962 * @uri: URI identifying the syntax (or NULL)
1963 * @mime_type: mime type identifying the content (or NULL)
1964 * @buffer: buffer of content to guess (or NULL)
1965 * @len: length of buffer
1966 * @identifier: identifier of content (or NULL)
1967 *
1968 * Guess a parser name for content.
1969 *
1970 * Find a parser by scoring recognition of the syntax by a block of
1971 * characters, the content identifier or a mime type. The content
1972 * identifier is typically a filename or URI or some other identifier.
1973 *
1974 * Return value: a parser name or NULL if no guess could be made
1975 **/
1976 const char*
raptor_guess_parser_name_v2(raptor_world * world,raptor_uri * uri,const char * mime_type,const unsigned char * buffer,size_t len,const unsigned char * identifier)1977 raptor_guess_parser_name_v2(raptor_world* world,
1978 raptor_uri *uri, const char *mime_type,
1979 const unsigned char *buffer, size_t len,
1980 const unsigned char *identifier)
1981 {
1982 unsigned int i;
1983 raptor_parser_factory *factory;
1984 unsigned char *suffix=NULL;
1985 /* FIXME - up to 10 parsers :) */
1986 #define MAX_PARSERS 10
1987 struct syntax_score scores[MAX_PARSERS];
1988
1989 if(identifier) {
1990 unsigned char *p=(unsigned char*)strrchr((const char*)identifier, '.');
1991 if(p) {
1992 unsigned char *from, *to;
1993 p++;
1994 suffix=(unsigned char*)RAPTOR_MALLOC(cstring, strlen((const char*)p)+1);
1995 if(!suffix)
1996 return NULL;
1997 for(from=p, to=suffix; *from; ) {
1998 unsigned char c=*from++;
1999 /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */
2000 if(!isalpha(c) && !isdigit(c)) {
2001 RAPTOR_FREE(cstring, suffix);
2002 suffix=NULL;
2003 to=NULL;
2004 break;
2005 }
2006 *to++=isupper((char)c) ? (unsigned char)tolower((char)c): c;
2007 }
2008 if(to)
2009 *to='\0';
2010 }
2011 }
2012
2013 for(i=0;
2014 (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2015 i++) {
2016 int score= -1;
2017 raptor_type_q* type_q=NULL;
2018
2019 if(mime_type && factory->mime_types) {
2020 int j;
2021 type_q=NULL;
2022 for(j=0;
2023 (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2024 j++) {
2025 if(!strcmp(mime_type, type_q->mime_type))
2026 break;
2027 }
2028 /* got an exact match mime type - score it via the Q */
2029 if(type_q)
2030 score=type_q->q;
2031 }
2032 /* mime type match has high Q - return result */
2033 if(score >= 10)
2034 break;
2035
2036 if(uri && factory->uri_string &&
2037 !strcmp((const char*)raptor_uri_as_string_v2(world, uri),
2038 (const char*)factory->uri_string))
2039 /* got an exact match syntax for URI - return result */
2040 break;
2041
2042 if(factory->recognise_syntax) {
2043 int c= -1;
2044
2045 /* Only use first N bytes to avoid HTML documents that contain
2046 * RDF/XML examples
2047 */
2048 #define FIRSTN 1024
2049 if(buffer && len && len > FIRSTN) {
2050 c=buffer[FIRSTN];
2051 ((char*)buffer)[FIRSTN]='\0';
2052 }
2053
2054 score += factory->recognise_syntax(factory, buffer, len,
2055 identifier, suffix,
2056 mime_type);
2057
2058 if(c >= 0)
2059 ((char*)buffer)[FIRSTN]=c;
2060 }
2061
2062 if(i > MAX_PARSERS) {
2063 RAPTOR_DEBUG2("Number of parsers greater than static buffer size %d\n",
2064 MAX_PARSERS);
2065 if(suffix)
2066 RAPTOR_FREE(cstring, suffix);
2067 return NULL;
2068 }
2069
2070 scores[i].score=score < 10 ? score : 10; scores[i].factory=factory;
2071 #if RAPTOR_DEBUG > 2
2072 RAPTOR_DEBUG3("Score %15s : %d\n", factory->name, score);
2073 #endif
2074 }
2075
2076 if(!factory) {
2077 /* sort the scores and pick a factory */
2078 qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score);
2079 if(scores[0].score >= 0)
2080 factory=scores[0].factory;
2081 }
2082
2083 if(suffix)
2084 RAPTOR_FREE(cstring, suffix);
2085
2086 return factory ? factory->name : NULL;
2087 }
2088
2089
2090 /*
2091 * raptor_parser_copy_user_state:
2092 * @to_parser: destination parser
2093 * @from_parser: source parser
2094 *
2095 * Copy user state between parsers - INTERNAL.
2096 *
2097 * Return value: non-0 on failure
2098 **/
2099 int
raptor_parser_copy_user_state(raptor_parser * to_parser,raptor_parser * from_parser)2100 raptor_parser_copy_user_state(raptor_parser *to_parser,
2101 raptor_parser *from_parser)
2102 {
2103 int rc=0;
2104 int i;
2105
2106 to_parser->user_data= from_parser->user_data;
2107 memcpy(&to_parser->error_handlers, &from_parser->error_handlers,
2108 sizeof(raptor_error_handlers));
2109 to_parser->statement_handler= from_parser->statement_handler;
2110 to_parser->generate_id_handler_user_data= from_parser->generate_id_handler_user_data;
2111 to_parser->generate_id_handler= from_parser->generate_id_handler;
2112 to_parser->default_generate_id_handler_base= from_parser->default_generate_id_handler_base;
2113 /* copy over non-shared user state - generate ID prefix string */
2114 if(from_parser->default_generate_id_handler_prefix) {
2115 size_t len=from_parser->default_generate_id_handler_prefix_length;
2116 to_parser->default_generate_id_handler_prefix=(char*)RAPTOR_MALLOC(cstring, len+1);
2117 if(to_parser->default_generate_id_handler_prefix)
2118 strncpy((char*)to_parser->default_generate_id_handler_prefix,
2119 (const char*)from_parser->default_generate_id_handler_prefix,
2120 len+1);
2121 else
2122 rc=1;
2123 }
2124 to_parser->default_generate_id_handler_prefix_length= from_parser->default_generate_id_handler_prefix_length;
2125 to_parser->namespace_handler= from_parser->namespace_handler;
2126 to_parser->namespace_handler_user_data= from_parser->namespace_handler_user_data;
2127 to_parser->uri_filter= from_parser->uri_filter;
2128 to_parser->uri_filter_user_data= from_parser->uri_filter_user_data;
2129
2130 /* copy over Cache-Control: header */
2131 if(!rc && from_parser->cache_control) {
2132 size_t len=strlen(from_parser->cache_control);
2133 to_parser->cache_control=(char*)RAPTOR_MALLOC(cstring, len+1);
2134 if(to_parser->cache_control)
2135 strncpy((char*)to_parser->cache_control,
2136 (const char*)from_parser->cache_control,
2137 len+1);
2138 else
2139 rc=1;
2140 }
2141
2142 /* copy over User-Agent: header */
2143 if(!rc && from_parser->user_agent) {
2144 size_t len=strlen(from_parser->user_agent);
2145 to_parser->user_agent=(char*)RAPTOR_MALLOC(cstring, len+1);
2146 if(to_parser->user_agent)
2147 strncpy((char*)to_parser->user_agent,
2148 (const char*)from_parser->user_agent,
2149 len+1);
2150 else
2151 rc=1;
2152 }
2153
2154 /* copy features */
2155 for(i=0; i<= RAPTOR_FEATURE_LAST; i++)
2156 to_parser->features[i]= from_parser->features[i];
2157
2158 return rc;
2159 }
2160
2161
2162 /*
2163 * raptor_parser_start_namespace:
2164 * @rdf_parser: parser
2165 * @nspace: namespace starting
2166 *
2167 * Internal - Invoke start namespace handler
2168 **/
2169 void
raptor_parser_start_namespace(raptor_parser * rdf_parser,raptor_namespace * nspace)2170 raptor_parser_start_namespace(raptor_parser* rdf_parser,
2171 raptor_namespace* nspace)
2172 {
2173 if(!rdf_parser->namespace_handler)
2174 return;
2175
2176 (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data,
2177 nspace);
2178 }
2179
2180
2181 /**
2182 * raptor_parser_get_accept_header:
2183 * @rdf_parser: parser
2184 *
2185 * Get an HTTP Accept value for the parser.
2186 *
2187 * The returned string must be freed by the caller such as with
2188 * raptor_free_memory().
2189 *
2190 * Return value: a new Accept: header string or NULL on failure
2191 **/
2192 const char*
raptor_parser_get_accept_header(raptor_parser * rdf_parser)2193 raptor_parser_get_accept_header(raptor_parser* rdf_parser)
2194 {
2195 raptor_parser_factory *factory=rdf_parser->factory;
2196 char *accept_header=NULL;
2197 size_t len;
2198 char *p;
2199 int i;
2200 raptor_type_q* type_q;
2201
2202 if(factory->accept_header)
2203 return factory->accept_header(rdf_parser);
2204
2205 if(!factory->mime_types)
2206 return NULL;
2207
2208 len=0;
2209 for(i=0;
2210 (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, i));
2211 i++) {
2212 if(type_q->mime_type) {
2213 len+= type_q->mime_type_len + 2; /* ", " */
2214 if(type_q->q < 10)
2215 len+= 6; /* ";q=X.Y" */
2216 }
2217 }
2218
2219 /* 9 = "\*\/\*;q=0.1" */
2220 accept_header=(char*)RAPTOR_MALLOC(cstring, len + 9 + 1);
2221 if(!accept_header)
2222 return NULL;
2223
2224 p=accept_header;
2225 for(i=0;
2226 (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, i));
2227 i++) {
2228 if(type_q->mime_type) {
2229 strncpy(p, type_q->mime_type, type_q->mime_type_len);
2230 p+= type_q->mime_type_len;
2231 if(type_q->q < 10) {
2232 *p++ = ';';
2233 *p++ = 'q';
2234 *p++ = '=';
2235 *p++ = '0';
2236 *p++ = '.';
2237 *p++ = '0' + (type_q->q);
2238 }
2239 }
2240
2241 *p++ = ',';
2242 *p++ = ' ';
2243 }
2244
2245 strncpy(p, "*/*;q=0.1", 10);
2246
2247 return accept_header;
2248 }
2249
2250
2251 const char*
raptor_parser_get_accept_header_all(raptor_world * world)2252 raptor_parser_get_accept_header_all(raptor_world* world)
2253 {
2254 raptor_parser_factory *factory;
2255 char *accept_header=NULL;
2256 size_t len;
2257 char *p;
2258 int i;
2259
2260 len=0;
2261 for(i=0;
2262 (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2263 i++) {
2264 raptor_type_q* type_q;
2265 int j;
2266
2267 for(j=0;
2268 (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2269 j++) {
2270 if(type_q->mime_type) {
2271 len+= type_q->mime_type_len + 2; /* ", " */
2272 if(type_q->q < 10)
2273 len+= 6; /* ";q=X.Y" */
2274 }
2275 }
2276 }
2277
2278 /* 9 = "\*\/\*;q=0.1" */
2279 accept_header=(char*)RAPTOR_MALLOC(cstring, len + 9 + 1);
2280 if(!accept_header)
2281 return NULL;
2282
2283 p=accept_header;
2284 for(i=0;
2285 (factory=(raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
2286 i++) {
2287 raptor_type_q* type_q;
2288 int j;
2289
2290 for(j=0;
2291 (type_q=(raptor_type_q*)raptor_sequence_get_at(factory->mime_types, j));
2292 j++) {
2293 if(type_q->mime_type) {
2294 strncpy(p, type_q->mime_type, type_q->mime_type_len);
2295 p+= type_q->mime_type_len;
2296 if(type_q->q < 10) {
2297 *p++ = ';';
2298 *p++ = 'q';
2299 *p++ = '=';
2300 *p++ = '0';
2301 *p++ = '.';
2302 *p++ = '0' + (type_q->q);
2303 }
2304 }
2305
2306 *p++ = ',';
2307 *p++ = ' ';
2308 }
2309
2310 }
2311
2312 strncpy(p, "*/*;q=0.1", 10);
2313
2314 return accept_header;
2315 }
2316
2317
2318 void
raptor_parser_save_content(raptor_parser * rdf_parser,int save)2319 raptor_parser_save_content(raptor_parser* rdf_parser, int save)
2320 {
2321 if(rdf_parser->sb)
2322 raptor_free_stringbuffer(rdf_parser->sb);
2323
2324 rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL;
2325 }
2326
2327
2328 const unsigned char*
raptor_parser_get_content(raptor_parser * rdf_parser,size_t * length_p)2329 raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p)
2330 {
2331 unsigned char* buffer;
2332 size_t len;
2333
2334 if(!rdf_parser->sb)
2335 return NULL;
2336
2337 len=raptor_stringbuffer_length(rdf_parser->sb);
2338 buffer=(unsigned char*)RAPTOR_MALLOC(cstring, len+1);
2339 if(!buffer)
2340 return NULL;
2341
2342 raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len);
2343
2344 if(length_p)
2345 *length_p=len;
2346
2347 return buffer;
2348 }
2349
2350
2351 void
raptor_parser_set_graph_name(raptor_parser * parser,raptor_uri * uri)2352 raptor_parser_set_graph_name(raptor_parser* parser, raptor_uri* uri)
2353 {
2354 if(parser->graph_handler)
2355 (*parser->graph_handler)(parser->user_data, uri);
2356 }
2357
2358
2359 int
raptor_parser_get_current_base_id(raptor_parser * parser)2360 raptor_parser_get_current_base_id(raptor_parser* parser)
2361 {
2362 if(parser->factory->get_current_base_id)
2363 return parser->factory->get_current_base_id(parser);
2364 else
2365 return parser->default_generate_id_handler_base;
2366 }
2367
2368
2369 /**
2370 * raptor_parser_get_world:
2371 * @rdf_parser: parser
2372 *
2373 * Get the #raptor_world object associated with a parser.
2374 *
2375 * Return value: raptor_world* pointer
2376 **/
2377 raptor_world *
raptor_parser_get_world(raptor_parser * rdf_parser)2378 raptor_parser_get_world(raptor_parser* rdf_parser)
2379 {
2380 return rdf_parser->world;
2381 }
2382
2383
2384 /* end not STANDALONE */
2385 #endif
2386
2387
2388 #ifdef STANDALONE
2389 #include <stdio.h>
2390
2391 int main(int argc, char *argv[]);
2392
2393
2394 int
main(int argc,char * argv[])2395 main(int argc, char *argv[])
2396 {
2397 raptor_world *world;
2398 #ifdef RAPTOR_DEBUG
2399 const char *program=raptor_basename(argv[0]);
2400 #endif
2401 int i;
2402 const char *s;
2403
2404 world = raptor_new_world();
2405 if(!world || raptor_world_open(world))
2406 exit(1);
2407
2408 #ifdef RAPTOR_DEBUG
2409 fprintf(stderr, "%s: Known features:\n", program);
2410 #endif
2411
2412 for(i=0; i <= RAPTOR_FEATURE_LAST; i++) {
2413 const char *feature_name;
2414 const char *feature_label;
2415 raptor_uri *feature_uri;
2416 int fn;
2417
2418 if(raptor_features_enumerate_v2(world, (raptor_feature)i,
2419 &feature_name, &feature_uri, &feature_label))
2420 continue;
2421
2422 #ifdef RAPTOR_DEBUG
2423 fprintf(stderr, " %2d %-20s %s\n", i, feature_name, feature_label);
2424 #endif
2425 fn=raptor_feature_from_uri_v2(world, feature_uri);
2426 if(fn != i) {
2427 fprintf(stderr, "raptor_feature_from_uri returned %d expected %d\n", fn, i);
2428 return 1;
2429 }
2430 raptor_free_uri_v2(world, feature_uri);
2431 }
2432
2433 s=raptor_parser_get_accept_header_all(world);
2434 fprintf(stderr, "Default HTTP accept header: '%s'\n", s);
2435 RAPTOR_FREE(cstring, s);
2436
2437 raptor_free_world(world);
2438
2439 return 0;
2440 }
2441
2442 #endif
2443