1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_librdfa.c - Raptor RDFA Parser via librdfa implementation
4 *
5 * Copyright (C) 2008, David Beckett http://www.dajobe.org/
6 *
7 * This package is Free Software and part of Redland http://librdf.org/
8 *
9 * It is licensed under the following three licenses as alternatives:
10 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11 * 2. GNU General Public License (GPL) V2 or any newer version
12 * 3. Apache License, V2.0 or any newer version
13 *
14 * You may not use this file except in compliance with at least one of
15 * the above three licenses.
16 *
17 * See LICENSE.html or LICENSE.txt at the top of this package for the
18 * complete terms and further detail along with the license texts for
19 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20 *
21 *
22 */
23
24
25 #ifdef HAVE_CONFIG_H
26 #include <raptor_config.h>
27 #endif
28
29 #ifdef WIN32
30 #include <win32_raptor_config.h>
31 #endif
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43
44 /* Raptor includes */
45 #include "raptor.h"
46 #include "raptor_internal.h"
47
48 #include "rdfa.h"
49 #include "rdfa_utils.h"
50
51
52
53 /*
54 * RDFA parser object
55 */
56 struct raptor_librdfa_parser_context_s {
57 /* librdfa object */
58 rdfacontext* context;
59
60 /* static statement for use in passing to user code */
61 raptor_statement statement;
62 };
63
64
65 typedef struct raptor_librdfa_parser_context_s raptor_librdfa_parser_context;
66
67
68 static int
raptor_librdfa_parse_init(raptor_parser * rdf_parser,const char * name)69 raptor_librdfa_parse_init(raptor_parser* rdf_parser, const char *name)
70 {
71 /*raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context; */
72 return 0;
73 }
74
75
76 static void
raptor_librdfa_parse_terminate(raptor_parser * rdf_parser)77 raptor_librdfa_parse_terminate(raptor_parser* rdf_parser)
78 {
79 raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
80
81 if(librdfa_parser->context) {
82 rdfa_parse_end(librdfa_parser->context);
83 rdfa_free_context(librdfa_parser->context);
84 librdfa_parser->context=NULL;
85 }
86 }
87
88
89 static void
raptor_librdfa_generate_statement(rdftriple * triple,void * callback_data)90 raptor_librdfa_generate_statement(rdftriple* triple, void* callback_data)
91 {
92 raptor_parser* parser=(raptor_parser*)callback_data;
93 raptor_statement *s=&parser->statement;
94 raptor_uri *subject_uri=NULL;
95 raptor_uri *predicate_uri=NULL;
96 raptor_uri *object_uri=NULL;
97 raptor_uri *datatype_uri=NULL;
98
99 if(!triple->subject || !triple->predicate || !triple->object) {
100 RAPTOR_FATAL1("Triple has NULL parts\n");
101 rdfa_free_triple(triple);
102 return;
103 }
104
105 if(triple->object_type == RDF_TYPE_NAMESPACE_PREFIX) {
106 RAPTOR_FATAL1("Triple has namespace object type\n");
107 rdfa_free_triple(triple);
108 return;
109 }
110
111 if((triple->subject[0] == '_') && (triple->subject[1] == ':')) {
112 s->subject_type = RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
113 s->subject= (triple->subject + 2);
114 } else {
115 s->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
116 subject_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->subject);
117 if(!subject_uri)
118 goto cleanup;
119 s->subject=subject_uri;
120 }
121
122
123 predicate_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->predicate);
124 if(!predicate_uri)
125 goto cleanup;
126 s->predicate=predicate_uri;
127 s->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
128
129 s->object = triple->object;
130 s->object_literal_datatype=NULL;
131 s->object_literal_language=NULL;
132 if(triple->object_type == RDF_TYPE_IRI) {
133 if((triple->object[0] == '_') && (triple->object[1] == ':')) {
134 s->object_type = RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
135 s->object = (triple->object + 2);
136 } else {
137 s->object_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
138 object_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->object);
139 if(!object_uri)
140 goto cleanup;
141 s->object=object_uri;
142 }
143 } else if(triple->object_type == RDF_TYPE_PLAIN_LITERAL) {
144 s->object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
145 if(triple->language)
146 s->object_literal_language=(const unsigned char*)triple->language;
147 } else if(triple->object_type == RDF_TYPE_XML_LITERAL) {
148 s->object_type = RAPTOR_IDENTIFIER_TYPE_XML_LITERAL;
149 } else if(triple->object_type == RDF_TYPE_TYPED_LITERAL) {
150 s->object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
151 if(triple->language)
152 s->object_literal_language=(const unsigned char*)triple->language;
153 if(triple->datatype) {
154 datatype_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->datatype);
155 if(!datatype_uri)
156 goto cleanup;
157 s->object_literal_datatype=datatype_uri;
158 /* If datatype, no language allowed */
159 s->object_literal_language=NULL;
160 }
161 } else {
162 RAPTOR_FATAL2("Triple has unknown object type %d\n", s->object_type);
163 goto cleanup;
164 }
165
166 if(!parser->statement_handler)
167 goto cleanup;
168
169 /* Generate triple */
170 (*parser->statement_handler)(parser->user_data, s);
171
172 cleanup:
173 rdfa_free_triple(triple);
174
175 if(subject_uri)
176 raptor_free_uri_v2(parser->world, subject_uri);
177 if(predicate_uri)
178 raptor_free_uri_v2(parser->world, predicate_uri);
179 if(object_uri)
180 raptor_free_uri_v2(parser->world, object_uri);
181 if(datatype_uri)
182 raptor_free_uri_v2(parser->world, datatype_uri);
183 }
184
185
186 static void
raptor_librdfa_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)187 raptor_librdfa_sax2_new_namespace_handler(void *user_data,
188 raptor_namespace* nspace)
189 {
190 raptor_parser* rdf_parser;
191 rdf_parser=(raptor_parser*)user_data;
192 raptor_parser_start_namespace(rdf_parser, nspace);
193 }
194
195
196
197 static int
raptor_librdfa_parse_start(raptor_parser * rdf_parser)198 raptor_librdfa_parse_start(raptor_parser* rdf_parser)
199 {
200 raptor_locator *locator=&rdf_parser->locator;
201 raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
202 int rc;
203 char* base_uri_string=NULL;
204
205 locator->line=1;
206 locator->column=0;
207 locator->byte=0;
208
209 if(rdf_parser->base_uri)
210 base_uri_string=(char*)raptor_uri_as_string_v2(rdf_parser->world, rdf_parser->base_uri);
211
212 if(librdfa_parser->context)
213 rdfa_free_context(librdfa_parser->context);
214 librdfa_parser->context=rdfa_create_context(base_uri_string);
215 if(!librdfa_parser->context)
216 return 1;
217
218 librdfa_parser->context->namespace_handler=raptor_librdfa_sax2_new_namespace_handler;
219 librdfa_parser->context->namespace_handler_user_data=rdf_parser;
220 librdfa_parser->context->error_handlers=&rdf_parser->error_handlers;
221
222 librdfa_parser->context->callback_data=rdf_parser;
223 rdfa_set_triple_handler(librdfa_parser->context,
224 raptor_librdfa_generate_statement);
225
226 rc = rdfa_parse_start(librdfa_parser->context);
227 if(rc != RDFA_PARSE_SUCCESS)
228 return 1;
229
230 return 0;
231 }
232
233
234 static int
raptor_librdfa_parse_chunk(raptor_parser * rdf_parser,const unsigned char * s,size_t len,int is_end)235 raptor_librdfa_parse_chunk(raptor_parser* rdf_parser,
236 const unsigned char *s, size_t len,
237 int is_end)
238 {
239 raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
240 int rval=rdfa_parse_chunk(librdfa_parser->context, (char*)s, len, is_end);
241 return rval != RDFA_PARSE_SUCCESS;
242 }
243
244 static int
raptor_librdfa_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)245 raptor_librdfa_parse_recognise_syntax(raptor_parser_factory* factory,
246 const unsigned char *buffer, size_t len,
247 const unsigned char *identifier,
248 const unsigned char *suffix,
249 const char *mime_type)
250 {
251 int score=0;
252
253 if(identifier) {
254 if(strstr((const char*)identifier, "RDFa"))
255 score=10;
256 }
257
258 if(buffer && len) {
259 #define HAS_RDFA_1 (raptor_memstr((const char*)buffer, len, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL)
260 #define HAS_RDFA_2 (raptor_memstr((const char*)buffer, len, "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd") != NULL)
261
262 if(HAS_RDFA_1 || HAS_RDFA_2)
263 score=10;
264 }
265
266 return score;
267 }
268
269
270 static int
raptor_librdfa_parser_register_factory(raptor_parser_factory * factory)271 raptor_librdfa_parser_register_factory(raptor_parser_factory *factory)
272 {
273 int rc=0;
274
275 factory->context_length = sizeof(raptor_librdfa_parser_context);
276
277 factory->need_base_uri = 0;
278
279 factory->init = raptor_librdfa_parse_init;
280 factory->terminate = raptor_librdfa_parse_terminate;
281 factory->start = raptor_librdfa_parse_start;
282 factory->chunk = raptor_librdfa_parse_chunk;
283 factory->recognise_syntax = raptor_librdfa_parse_recognise_syntax;
284
285 rc=raptor_parser_factory_add_uri(factory,
286 (const unsigned char*)"http://www.w3.org/TR/rdfa/");
287
288 return rc;
289 }
290
291
292 int
raptor_init_parser_rdfa(raptor_world * world)293 raptor_init_parser_rdfa(raptor_world* world)
294 {
295 return !raptor_parser_register_factory(world, "rdfa", "RDF/A via librdfa",
296 &raptor_librdfa_parser_register_factory);
297 }
298