1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_abbrev.c - Code common to abbreviating serializers (ttl/rdfxmla)
4 *
5 * Copyright (C) 2006, Dave Robillard
6 * Copyright (C) 2004-2011, David Beckett http://www.dajobe.org/
7 * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/
8 * Copyright (C) 2005, Steve Shepard steveshep@gmail.com
9 *
10 * This package is Free Software and part of Redland http://librdf.org/
11 *
12 * It is licensed under the following three licenses as alternatives:
13 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
14 * 2. GNU General Public License (GPL) V2 or any newer version
15 * 3. Apache License, V2.0 or any newer version
16 *
17 * You may not use this file except in compliance with at least one of
18 * the above three licenses.
19 *
20 * See LICENSE.html or LICENSE.txt at the top of this package for the
21 * complete terms and further detail along with the license texts for
22 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
23 *
24 */
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40
41 /* Raptor includes */
42 #include "raptor2.h"
43 #include "raptor_internal.h"
44
45
46 /*
47 * raptor_abbrev_node implementation
48 *
49 */
50
51
52 static raptor_abbrev_subject* raptor_new_abbrev_subject(raptor_abbrev_node* node);
53
54 /**
55 * raptor_new_abbrev_node:
56 * @world: raptor world
57 * @term: term to use
58 *
59 * INTERNAL - Constructor for raptor_abbrev_node
60 *
61 * The @term is copied by the constructor.
62 *
63 * Return value: new raptor abbrev node or NULL on failure
64 **/
65 raptor_abbrev_node*
raptor_new_abbrev_node(raptor_world * world,raptor_term * term)66 raptor_new_abbrev_node(raptor_world* world, raptor_term *term)
67 {
68 raptor_abbrev_node* node = NULL;
69
70 if(term->type == RAPTOR_TERM_TYPE_UNKNOWN)
71 return NULL;
72
73 node = RAPTOR_CALLOC(raptor_abbrev_node*, 1, sizeof(*node));
74 if(node) {
75 node->world = world;
76 node->ref_count = 1;
77 node->term = raptor_term_copy(term);
78 }
79
80 return node;
81 }
82
83
84 /**
85 * raptor_new_abbrev_node:
86 * @node: raptor abbrev node
87 *
88 * INTERNAL - Destructor for raptor_abbrev_node
89 */
90 void
raptor_free_abbrev_node(raptor_abbrev_node * node)91 raptor_free_abbrev_node(raptor_abbrev_node* node)
92 {
93 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(node, raptor_abbrev_node);
94
95 if(--node->ref_count)
96 return;
97
98 if(node->term)
99 raptor_free_term(node->term);
100
101 RAPTOR_FREE(raptor_abbrev_node, node);
102 }
103
104
105 /**
106 * raptor_abbrev_node_compare:
107 * @node1: node 1
108 * @node2: node 2
109 *
110 * INTERNAL - compare two raptor_abbrev_nodes.
111 *
112 * This needs to be a strong ordering for use by raptor_avltree.
113 * This is very performance critical, anything to make it faster is worth it.
114 *
115 * Return value: <0, 0 or 1 if @node1 less than, equal or greater
116 * than @node2 respectively
117 */
118 int
raptor_abbrev_node_compare(raptor_abbrev_node * node1,raptor_abbrev_node * node2)119 raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2)
120 {
121 if(node1 == node2)
122 return 0;
123
124 return raptor_term_compare(node1->term, node2->term);
125 }
126
127
128 /**
129 * raptor_abbrev_node_equals:
130 * @node1: node 1
131 * @node2: node 2
132 *
133 * INTERNAL - compare two raptor_abbrev_nodes for equality
134 *
135 * Return value: non-0 if nodes are equal
136 */
137 int
raptor_abbrev_node_equals(raptor_abbrev_node * node1,raptor_abbrev_node * node2)138 raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2)
139 {
140 return raptor_term_equals(node1->term, node2->term);
141 }
142
143
144 /**
145 * raptor_abbrev_node_lookup:
146 * @nodes: Tree of nodes to search
147 * @node: Node value to search for
148 *
149 * INTERNAL - Look in an avltree of nodes for a node described by parameters
150 * and if present create it, add it and return it
151 *
152 * Return value: the node found/created or NULL on failure
153 */
154 raptor_abbrev_node*
raptor_abbrev_node_lookup(raptor_avltree * nodes,raptor_term * term)155 raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term)
156 {
157 raptor_abbrev_node *lookup_node;
158 raptor_abbrev_node *rv_node;
159
160 /* Create a temporary node for search comparison. */
161 lookup_node = raptor_new_abbrev_node(term->world, term);
162
163 if(!lookup_node)
164 return NULL;
165
166 rv_node = (raptor_abbrev_node*)raptor_avltree_search(nodes, lookup_node);
167
168 /* If not found, insert/return a new one */
169 if(!rv_node) {
170
171 if(raptor_avltree_add(nodes, lookup_node))
172 return NULL;
173 else
174 return lookup_node;
175
176 /* Found */
177 } else {
178 raptor_free_abbrev_node(lookup_node);
179 return rv_node;
180 }
181 }
182
183
184 static raptor_abbrev_node**
raptor_new_abbrev_po(raptor_abbrev_node * predicate,raptor_abbrev_node * object)185 raptor_new_abbrev_po(raptor_abbrev_node* predicate, raptor_abbrev_node* object)
186 {
187 raptor_abbrev_node** nodes = NULL;
188 nodes = RAPTOR_CALLOC(raptor_abbrev_node**, 2, sizeof(raptor_abbrev_node*));
189 if(!nodes)
190 return NULL;
191
192 nodes[0] = predicate;
193 nodes[1] = object;
194
195 return nodes;
196 }
197
198
199 static void
raptor_free_abbrev_po(raptor_abbrev_node ** nodes)200 raptor_free_abbrev_po(raptor_abbrev_node** nodes)
201 {
202 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(nodes, raptor_abbrev_node_pair);
203
204 if(nodes[0])
205 raptor_free_abbrev_node(nodes[0]);
206 if(nodes[1])
207 raptor_free_abbrev_node(nodes[1]);
208
209 RAPTOR_FREE(raptor_abbrev_nodes, nodes);
210 }
211
212
213 static int
raptor_compare_abbrev_po(raptor_abbrev_node ** nodes1,raptor_abbrev_node ** nodes2)214 raptor_compare_abbrev_po(raptor_abbrev_node** nodes1,
215 raptor_abbrev_node** nodes2)
216 {
217 int d;
218 d = raptor_abbrev_node_compare(nodes1[0], nodes2[0]);
219 if(!d)
220 d = raptor_abbrev_node_compare(nodes1[1], nodes2[1]);
221
222 return d;
223 }
224
225
226 #ifdef RAPTOR_DEBUG
227 static void
raptor_print_abbrev_po(raptor_abbrev_node ** nodes,FILE * handle)228 raptor_print_abbrev_po(raptor_abbrev_node** nodes, FILE* handle)
229 {
230 raptor_abbrev_node* p = nodes[0];
231 raptor_abbrev_node* o = nodes[1];
232
233 if(p && o) {
234 fputc('[', handle);
235 raptor_term_print_as_ntriples(p->term, handle);
236 fputs(" : ", handle);
237 raptor_term_print_as_ntriples(o->term, handle);
238 fputs("]\n", handle);
239 }
240 }
241 #endif
242
243
244 /*
245 * raptor_abbrev_subject implementation
246 *
247 * The subject of triples, with all predicates and values
248 * linked from them.
249 *
250 **/
251
252
253 static raptor_abbrev_subject*
raptor_new_abbrev_subject(raptor_abbrev_node * node)254 raptor_new_abbrev_subject(raptor_abbrev_node* node)
255 {
256 raptor_abbrev_subject* subject;
257
258 if(!(node->term->type == RAPTOR_TERM_TYPE_URI ||
259 node->term->type == RAPTOR_TERM_TYPE_BLANK)) {
260 raptor_log_error(node->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
261 "Subject node is type %d not a uri or blank node");
262 return NULL;
263 }
264
265 subject = RAPTOR_CALLOC(raptor_abbrev_subject*, 1, sizeof(*subject));
266
267 if(subject) {
268 subject->node = node;
269 subject->node->ref_count++;
270 subject->node->count_as_subject++;
271
272 subject->node_type = NULL;
273
274 subject->valid = 1;
275
276 subject->properties =
277 raptor_new_avltree((raptor_data_compare_handler)raptor_compare_abbrev_po,
278 (raptor_data_free_handler)raptor_free_abbrev_po,
279 0);
280 #ifdef RAPTOR_DEBUG
281 if(subject->properties)
282 raptor_avltree_set_print_handler(subject->properties,
283 (raptor_data_print_handler)raptor_print_abbrev_po);
284 #endif
285
286 subject->list_items =
287 raptor_new_sequence((raptor_data_free_handler)raptor_free_abbrev_node, NULL);
288
289 if(!subject->properties || !subject->list_items) {
290 raptor_free_abbrev_subject(subject);
291 subject = NULL;
292 }
293
294 }
295
296 return subject;
297 }
298
299
300 void
raptor_free_abbrev_subject(raptor_abbrev_subject * subject)301 raptor_free_abbrev_subject(raptor_abbrev_subject* subject)
302 {
303 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(subject, raptor_abbrev_subject);
304
305 if(subject->node)
306 raptor_free_abbrev_node(subject->node);
307
308 if(subject->node_type)
309 raptor_free_abbrev_node(subject->node_type);
310
311 if(subject->properties)
312 raptor_free_avltree(subject->properties);
313
314 if(subject->list_items)
315 raptor_free_sequence(subject->list_items);
316
317 RAPTOR_FREE(raptor_subject, subject);
318 }
319
320
321 int
raptor_abbrev_subject_valid(raptor_abbrev_subject * subject)322 raptor_abbrev_subject_valid(raptor_abbrev_subject *subject)
323 {
324 return subject->valid;
325 }
326
327
328 int
raptor_abbrev_subject_invalidate(raptor_abbrev_subject * subject)329 raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject)
330 {
331 subject->valid = 0;
332 return 0;
333 }
334
335
336
337
338 /**
339 * raptor_subject_add_property:
340 * @subject: subject node to add to
341 * @predicate: predicate node
342 * @object: object node
343 *
344 * INTERNAL - Add predicate/object pair into properties array of a subject node.
345 *
346 * The subject node takes ownership of the predicate/object nodes.
347 * On error, predicate/object are freed immediately.
348 *
349 * Return value: <0 on failure, >0 if pair is a duplicate and it was not added
350 **/
351 int
raptor_abbrev_subject_add_property(raptor_abbrev_subject * subject,raptor_abbrev_node * predicate,raptor_abbrev_node * object)352 raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject,
353 raptor_abbrev_node* predicate,
354 raptor_abbrev_node* object)
355 {
356 int err;
357 raptor_abbrev_node** nodes;
358
359 nodes = raptor_new_abbrev_po(predicate, object);
360 if(!nodes)
361 return -1;
362
363 predicate->ref_count++;
364 object->ref_count++;
365
366 if(raptor_avltree_search(subject->properties, nodes)) {
367 /* Already present - do not add a duplicate triple (s->[p o]) */
368 raptor_free_abbrev_po(nodes);
369 return 1;
370 }
371
372 #if 0
373 fprintf(stderr, "Adding P,O ");
374 raptor_print_abbrev_po(stderr, nodes);
375
376 raptor_avltree_dump(subject->properties, stderr);
377 #endif
378 err = raptor_avltree_add(subject->properties, nodes);
379 if(err)
380 return -1;
381 #if 0
382 fprintf(stderr, "Result ");
383 raptor_avltree_print(subject->properties, stderr);
384
385 raptor_avltree_dump(subject->properties, stderr);
386
387 raptor_avltree_check(subject->properties);
388
389 fprintf(stderr, "\n\n");
390 #endif
391
392 return 0;
393 }
394
395
396 int
raptor_abbrev_subject_compare(raptor_abbrev_subject * subject1,raptor_abbrev_subject * subject2)397 raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1,
398 raptor_abbrev_subject* subject2)
399 {
400 return raptor_abbrev_node_compare(subject1->node, subject2->node);
401 }
402
403
404 /**
405 * raptor_abbrev_subject_find:
406 * @subjects: AVL-Tree of subject nodes
407 * @term: node to find
408 *
409 * INTERNAL - Find a subject node in an AVL-Tree of subject nodes
410 *
411 * Return value: node or NULL if not found or failure
412 */
413 raptor_abbrev_subject*
raptor_abbrev_subject_find(raptor_avltree * subjects,raptor_term * node)414 raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node)
415 {
416 raptor_abbrev_subject* rv_subject = NULL;
417 raptor_abbrev_node* lookup_node = NULL;
418 raptor_abbrev_subject* lookup = NULL;
419
420 /* datatype and language are both NULL for a subject node */
421
422 lookup_node = raptor_new_abbrev_node(node->world, node);
423 if(!lookup_node)
424 return NULL;
425
426 lookup = raptor_new_abbrev_subject(lookup_node);
427 if(!lookup) {
428 raptor_free_abbrev_node(lookup_node);
429 return NULL;
430 }
431
432 rv_subject = (raptor_abbrev_subject*) raptor_avltree_search(subjects, lookup);
433
434 raptor_free_abbrev_subject(lookup);
435 raptor_free_abbrev_node(lookup_node);
436
437 return rv_subject;
438 }
439
440
441 /**
442 * raptor_abbrev_subject_lookup:
443 * @nodes: AVL-Tree of subject nodes
444 * @subjects: AVL-Tree of URI-subject nodes
445 * @blanks: AVL-Tree of blank-subject nodes
446 * @term: node to find
447 *
448 * INTERNAL - Find a subject node in the appropriate uri/blank AVL-Tree of subject nodes or add it
449 *
450 * Return value: node or NULL on failure
451 */
452 raptor_abbrev_subject*
raptor_abbrev_subject_lookup(raptor_avltree * nodes,raptor_avltree * subjects,raptor_avltree * blanks,raptor_term * term)453 raptor_abbrev_subject_lookup(raptor_avltree* nodes,
454 raptor_avltree* subjects, raptor_avltree* blanks,
455 raptor_term* term)
456 {
457 raptor_avltree *tree;
458 raptor_abbrev_subject* rv_subject;
459
460 /* Search for specified resource. */
461 tree = (term->type == RAPTOR_TERM_TYPE_BLANK) ? blanks : subjects;
462 rv_subject = raptor_abbrev_subject_find(tree, term);
463
464 /* If not found, create one and insert it */
465 if(!rv_subject) {
466 raptor_abbrev_node* node = raptor_abbrev_node_lookup(nodes, term);
467 if(node) {
468 rv_subject = raptor_new_abbrev_subject(node);
469 if(rv_subject) {
470 if(raptor_avltree_add(tree, rv_subject)) {
471 rv_subject = NULL;
472 }
473 }
474 }
475 }
476
477 return rv_subject;
478 }
479
480
481 #ifdef ABBREV_DEBUG
482 void
raptor_print_subject(raptor_abbrev_subject * subject)483 raptor_print_subject(raptor_abbrev_subject* subject)
484 {
485 int i;
486 unsigned char *subj;
487 unsigned char *pred;
488 unsigned char *obj;
489 raptor_avltree_iterator* iter = NULL;
490
491 /* Note: The raptor_abbrev_node field passed as the first argument for
492 * raptor_term_to_string() is somewhat arbitrary, since as
493 * the data structure is designed, the first word in the value union
494 * is what was passed as the subject/predicate/object of the
495 * statement.
496 */
497 subj = raptor_term_to_string(subject);
498
499 if(subject->type) {
500 obj = raptor_term_to_string(subject);
501 fprintf(stderr,"[%s, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, %s]\n", subj, obj);
502 RAPTOR_FREE(char*, obj);
503 }
504
505 for(i = 0; i < raptor_sequence_size(subject->elements); i++) {
506
507 raptor_abbrev_node* o = raptor_sequence_get_at(subject->elements, i);
508 if(o) {
509 obj = raptor_term_to_string(o);
510 fprintf(stderr,"[%s, [rdf:_%d], %s]\n", subj, i, obj);
511 RAPTOR_FREE(char*, obj);
512 }
513
514 }
515
516
517 iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1);
518 while(iter) {
519 raptor_abbrev_node** nodes;
520 nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter);
521 if(!nodes)
522 break;
523 raptor_print_abbrev_po(stderr, nodes);
524
525 if(raptor_avltree_iterator_next(iter))
526 break;
527 }
528 if(iter)
529 raptor_free_avltree_iterator(iter);
530
531 RAPTOR_FREE(char*, subj);
532
533 }
534 #endif
535
536
537 /* helper functions */
538
539 /**
540 * raptor_new_qname_from_resource:
541 * @namespaces: sequence of namespaces (corresponding to nstack)
542 * @nstack: #raptor_namespace_stack to use/update
543 * @namespace_count: size of nstack (may be modified)
544 * @node: #raptor_abbrev_node to use
545 *
546 * INTERNAL - Make an XML QName from the URI associated with the node.
547 *
548 * Return value: the QName or NULL on failure
549 **/
550 raptor_qname*
raptor_new_qname_from_resource(raptor_sequence * namespaces,raptor_namespace_stack * nstack,int * namespace_count,raptor_abbrev_node * node)551 raptor_new_qname_from_resource(raptor_sequence* namespaces,
552 raptor_namespace_stack* nstack,
553 int* namespace_count,
554 raptor_abbrev_node* node)
555 {
556 unsigned char* name = NULL; /* where to split predicate name */
557 size_t name_len = 1;
558 unsigned char *uri_string;
559 size_t uri_len;
560 unsigned char *p;
561 raptor_uri *ns_uri;
562 raptor_namespace *ns;
563 raptor_qname *qname;
564 unsigned char *ns_uri_string;
565 size_t ns_uri_string_len;
566
567 if(node->term->type != RAPTOR_TERM_TYPE_URI) {
568 #ifdef RAPTOR_DEBUG
569 RAPTOR_FATAL1("Node must be a URI\n");
570 #endif
571 return NULL;
572 }
573
574 qname = raptor_new_qname_from_namespace_uri(nstack, node->term->value.uri, 10);
575 if(qname)
576 return qname;
577
578 uri_string = raptor_uri_as_counted_string(node->term->value.uri, &uri_len);
579
580 p= uri_string;
581 name_len = uri_len;
582 while(name_len >0) {
583 if(raptor_xml_name_check(p, name_len, 10)) {
584 name = p;
585 break;
586 }
587 p++; name_len--;
588 }
589
590 if(!name || (name == uri_string))
591 return NULL;
592
593 ns_uri_string_len = uri_len - name_len;
594 ns_uri_string = RAPTOR_MALLOC(unsigned char*, ns_uri_string_len + 1);
595 if(!ns_uri_string)
596 return NULL;
597 memcpy(ns_uri_string, (const char*)uri_string, ns_uri_string_len);
598 ns_uri_string[ns_uri_string_len] = '\0';
599
600 ns_uri = raptor_new_uri_from_counted_string(node->world, ns_uri_string,
601 ns_uri_string_len);
602 RAPTOR_FREE(char*, ns_uri_string);
603
604 if(!ns_uri)
605 return NULL;
606
607 ns = raptor_namespaces_find_namespace_by_uri(nstack, ns_uri);
608 if(!ns) {
609 /* The namespace was not declared, so create one */
610 unsigned char prefix[2 + MAX_ASCII_INT_SIZE + 1];
611 (*namespace_count)++;
612 prefix[0] = 'n';
613 prefix[1] = 's';
614 (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*,&prefix[2]),
615 MAX_ASCII_INT_SIZE + 1, *namespace_count,
616 /* base */ 10, -1, '\0');
617
618 ns = raptor_new_namespace_from_uri(nstack, prefix, ns_uri, 0);
619
620 /* We'll most likely need this namespace again. Push it on our
621 * stack. It will be deleted in raptor_rdfxmla_serialize_terminate()
622 */
623 if(raptor_sequence_push(namespaces, ns)) {
624 /* namespaces sequence has no free handler so we have to free
625 * the ns ourselves on error
626 */
627 raptor_free_namespace(ns);
628 raptor_free_uri(ns_uri);
629 return NULL;
630 }
631 }
632
633 qname = raptor_new_qname_from_namespace_local_name(node->world, ns, name,
634 NULL);
635
636 raptor_free_uri(ns_uri);
637
638 return qname;
639 }
640