1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_librdfa.c - Raptor RDFA Parser via librdfa implementation
4  *
5  * Copyright (C) 2008, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 
25 #ifdef HAVE_CONFIG_H
26 #include <raptor_config.h>
27 #endif
28 
29 #ifdef WIN32
30 #include <win32_raptor_config.h>
31 #endif
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43 
44 /* Raptor includes */
45 #include "raptor.h"
46 #include "raptor_internal.h"
47 
48 #include "rdfa.h"
49 #include "rdfa_utils.h"
50 
51 
52 
53 /*
54  * RDFA parser object
55  */
56 struct raptor_librdfa_parser_context_s {
57   /* librdfa object */
58   rdfacontext* context;
59 
60   /* static statement for use in passing to user code */
61   raptor_statement statement;
62 };
63 
64 
65 typedef struct raptor_librdfa_parser_context_s raptor_librdfa_parser_context;
66 
67 
68 static int
raptor_librdfa_parse_init(raptor_parser * rdf_parser,const char * name)69 raptor_librdfa_parse_init(raptor_parser* rdf_parser, const char *name)
70 {
71   /*raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context; */
72   return 0;
73 }
74 
75 
76 static void
raptor_librdfa_parse_terminate(raptor_parser * rdf_parser)77 raptor_librdfa_parse_terminate(raptor_parser* rdf_parser)
78 {
79   raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
80 
81   if(librdfa_parser->context) {
82     rdfa_parse_end(librdfa_parser->context);
83     rdfa_free_context(librdfa_parser->context);
84     librdfa_parser->context=NULL;
85   }
86 }
87 
88 
89 static void
raptor_librdfa_generate_statement(rdftriple * triple,void * callback_data)90 raptor_librdfa_generate_statement(rdftriple* triple, void* callback_data)
91 {
92   raptor_parser* parser=(raptor_parser*)callback_data;
93   raptor_statement *s=&parser->statement;
94   raptor_uri *subject_uri=NULL;
95   raptor_uri *predicate_uri=NULL;
96   raptor_uri *object_uri=NULL;
97   raptor_uri *datatype_uri=NULL;
98 
99   if(!triple->subject || !triple->predicate || !triple->object) {
100     RAPTOR_FATAL1("Triple has NULL parts\n");
101     rdfa_free_triple(triple);
102     return;
103   }
104 
105   if(triple->object_type == RDF_TYPE_NAMESPACE_PREFIX) {
106     RAPTOR_FATAL1("Triple has namespace object type\n");
107     rdfa_free_triple(triple);
108     return;
109   }
110 
111   if((triple->subject[0] == '_') && (triple->subject[1] == ':')) {
112     s->subject_type = RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
113     s->subject= (triple->subject + 2);
114   } else {
115     s->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
116     subject_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->subject);
117     if(!subject_uri)
118       goto cleanup;
119     s->subject=subject_uri;
120   }
121 
122 
123   predicate_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->predicate);
124   if(!predicate_uri)
125     goto cleanup;
126   s->predicate=predicate_uri;
127   s->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
128 
129   s->object = triple->object;
130   s->object_literal_datatype=NULL;
131   s->object_literal_language=NULL;
132   if(triple->object_type == RDF_TYPE_IRI) {
133     if((triple->object[0] == '_') && (triple->object[1] == ':')) {
134       s->object_type = RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
135       s->object = (triple->object + 2);
136     } else {
137       s->object_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
138       object_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->object);
139       if(!object_uri)
140         goto cleanup;
141       s->object=object_uri;
142     }
143   } else if(triple->object_type == RDF_TYPE_PLAIN_LITERAL) {
144     s->object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
145     if(triple->language)
146       s->object_literal_language=(const unsigned char*)triple->language;
147   } else if(triple->object_type == RDF_TYPE_XML_LITERAL) {
148     s->object_type = RAPTOR_IDENTIFIER_TYPE_XML_LITERAL;
149   } else if(triple->object_type == RDF_TYPE_TYPED_LITERAL) {
150     s->object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
151     if(triple->language)
152       s->object_literal_language=(const unsigned char*)triple->language;
153     if(triple->datatype) {
154       datatype_uri=raptor_new_uri_v2(parser->world, (const unsigned char*)triple->datatype);
155       if(!datatype_uri)
156         goto cleanup;
157       s->object_literal_datatype=datatype_uri;
158       /* If datatype, no language allowed */
159       s->object_literal_language=NULL;
160     }
161   } else {
162     RAPTOR_FATAL2("Triple has unknown object type %d\n", s->object_type);
163     goto cleanup;
164   }
165 
166   if(!parser->statement_handler)
167     goto cleanup;
168 
169   /* Generate triple */
170   (*parser->statement_handler)(parser->user_data, s);
171 
172   cleanup:
173   rdfa_free_triple(triple);
174 
175   if(subject_uri)
176     raptor_free_uri_v2(parser->world, subject_uri);
177   if(predicate_uri)
178     raptor_free_uri_v2(parser->world, predicate_uri);
179   if(object_uri)
180     raptor_free_uri_v2(parser->world, object_uri);
181   if(datatype_uri)
182     raptor_free_uri_v2(parser->world, datatype_uri);
183 }
184 
185 
186 static void
raptor_librdfa_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)187 raptor_librdfa_sax2_new_namespace_handler(void *user_data,
188                                           raptor_namespace* nspace)
189 {
190   raptor_parser* rdf_parser;
191   rdf_parser=(raptor_parser*)user_data;
192   raptor_parser_start_namespace(rdf_parser, nspace);
193 }
194 
195 
196 
197 static int
raptor_librdfa_parse_start(raptor_parser * rdf_parser)198 raptor_librdfa_parse_start(raptor_parser* rdf_parser)
199 {
200   raptor_locator *locator=&rdf_parser->locator;
201   raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
202   int rc;
203   char* base_uri_string=NULL;
204 
205   locator->line=1;
206   locator->column=0;
207   locator->byte=0;
208 
209   if(rdf_parser->base_uri)
210     base_uri_string=(char*)raptor_uri_as_string_v2(rdf_parser->world, rdf_parser->base_uri);
211 
212   if(librdfa_parser->context)
213     rdfa_free_context(librdfa_parser->context);
214   librdfa_parser->context=rdfa_create_context(base_uri_string);
215   if(!librdfa_parser->context)
216     return 1;
217 
218   librdfa_parser->context->namespace_handler=raptor_librdfa_sax2_new_namespace_handler;
219   librdfa_parser->context->namespace_handler_user_data=rdf_parser;
220   librdfa_parser->context->error_handlers=&rdf_parser->error_handlers;
221 
222   librdfa_parser->context->callback_data=rdf_parser;
223   rdfa_set_triple_handler(librdfa_parser->context,
224                           raptor_librdfa_generate_statement);
225 
226   rc = rdfa_parse_start(librdfa_parser->context);
227   if(rc != RDFA_PARSE_SUCCESS)
228     return 1;
229 
230   return 0;
231 }
232 
233 
234 static int
raptor_librdfa_parse_chunk(raptor_parser * rdf_parser,const unsigned char * s,size_t len,int is_end)235 raptor_librdfa_parse_chunk(raptor_parser* rdf_parser,
236                            const unsigned char *s, size_t len,
237                            int is_end)
238 {
239   raptor_librdfa_parser_context *librdfa_parser=(raptor_librdfa_parser_context*)rdf_parser->context;
240   int rval=rdfa_parse_chunk(librdfa_parser->context, (char*)s, len, is_end);
241   return rval != RDFA_PARSE_SUCCESS;
242 }
243 
244 static int
raptor_librdfa_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)245 raptor_librdfa_parse_recognise_syntax(raptor_parser_factory* factory,
246                                       const unsigned char *buffer, size_t len,
247                                       const unsigned char *identifier,
248                                       const unsigned char *suffix,
249                                       const char *mime_type)
250 {
251   int score=0;
252 
253   if(identifier) {
254     if(strstr((const char*)identifier, "RDFa"))
255       score=10;
256   }
257 
258   if(buffer && len) {
259 #define  HAS_RDFA_1 (raptor_memstr((const char*)buffer, len, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL)
260 #define  HAS_RDFA_2 (raptor_memstr((const char*)buffer, len, "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd") != NULL)
261 
262     if(HAS_RDFA_1 || HAS_RDFA_2)
263       score=10;
264   }
265 
266   return score;
267 }
268 
269 
270 static int
raptor_librdfa_parser_register_factory(raptor_parser_factory * factory)271 raptor_librdfa_parser_register_factory(raptor_parser_factory *factory)
272 {
273   int rc=0;
274 
275   factory->context_length     = sizeof(raptor_librdfa_parser_context);
276 
277   factory->need_base_uri = 0;
278 
279   factory->init      = raptor_librdfa_parse_init;
280   factory->terminate = raptor_librdfa_parse_terminate;
281   factory->start     = raptor_librdfa_parse_start;
282   factory->chunk     = raptor_librdfa_parse_chunk;
283   factory->recognise_syntax = raptor_librdfa_parse_recognise_syntax;
284 
285   rc=raptor_parser_factory_add_uri(factory,
286                                 (const unsigned char*)"http://www.w3.org/TR/rdfa/");
287 
288   return rc;
289 }
290 
291 
292 int
raptor_init_parser_rdfa(raptor_world * world)293 raptor_init_parser_rdfa(raptor_world* world)
294 {
295   return !raptor_parser_register_factory(world, "rdfa",  "RDF/A via librdfa",
296                                          &raptor_librdfa_parser_register_factory);
297 }
298