1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_abbrev.c - Code common to abbreviating serializers (ttl/rdfxmla)
4  *
5  * Copyright (C) 2006, Dave Robillard
6  * Copyright (C) 2004-2011, David Beckett http://www.dajobe.org/
7  * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/
8  * Copyright (C) 2005, Steve Shepard steveshep@gmail.com
9  *
10  * This package is Free Software and part of Redland http://librdf.org/
11  *
12  * It is licensed under the following three licenses as alternatives:
13  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
14  *   2. GNU General Public License (GPL) V2 or any newer version
15  *   3. Apache License, V2.0 or any newer version
16  *
17  * You may not use this file except in compliance with at least one of
18  * the above three licenses.
19  *
20  * See LICENSE.html or LICENSE.txt at the top of this package for the
21  * complete terms and further detail along with the license texts for
22  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
23  *
24  */
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 
41 /* Raptor includes */
42 #include "raptor2.h"
43 #include "raptor_internal.h"
44 
45 
46 /*
47  * raptor_abbrev_node implementation
48  *
49  */
50 
51 
52 static raptor_abbrev_subject* raptor_new_abbrev_subject(raptor_abbrev_node* node);
53 
54 /**
55  * raptor_new_abbrev_node:
56  * @world: raptor world
57  * @term: term to use
58  *
59  * INTERNAL - Constructor for raptor_abbrev_node
60  *
61  * The @term is copied by the constructor.
62  *
63  * Return value: new raptor abbrev node or NULL on failure
64  **/
65 raptor_abbrev_node*
raptor_new_abbrev_node(raptor_world * world,raptor_term * term)66 raptor_new_abbrev_node(raptor_world* world, raptor_term *term)
67 {
68   raptor_abbrev_node* node = NULL;
69 
70   if(term->type == RAPTOR_TERM_TYPE_UNKNOWN)
71     return NULL;
72 
73   node = RAPTOR_CALLOC(raptor_abbrev_node*, 1, sizeof(*node));
74   if(node) {
75     node->world = world;
76     node->ref_count = 1;
77     node->term = raptor_term_copy(term);
78   }
79 
80   return node;
81 }
82 
83 
84 /**
85  * raptor_new_abbrev_node:
86  * @node: raptor abbrev node
87  *
88  * INTERNAL - Destructor for raptor_abbrev_node
89  */
90 void
raptor_free_abbrev_node(raptor_abbrev_node * node)91 raptor_free_abbrev_node(raptor_abbrev_node* node)
92 {
93   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(node, raptor_abbrev_node);
94 
95   if(--node->ref_count)
96     return;
97 
98   if(node->term)
99     raptor_free_term(node->term);
100 
101   RAPTOR_FREE(raptor_abbrev_node, node);
102 }
103 
104 
105 /**
106  * raptor_abbrev_node_compare:
107  * @node1: node 1
108  * @node2: node 2
109  *
110  * INTERNAL - compare two raptor_abbrev_nodes.
111  *
112  * This needs to be a strong ordering for use by raptor_avltree.
113  * This is very performance critical, anything to make it faster is worth it.
114  *
115  * Return value: <0, 0 or 1 if @node1 less than, equal or greater
116  * than @node2 respectively
117  */
118 int
raptor_abbrev_node_compare(raptor_abbrev_node * node1,raptor_abbrev_node * node2)119 raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2)
120 {
121   if(node1 == node2)
122     return 0;
123 
124   return raptor_term_compare(node1->term, node2->term);
125 }
126 
127 
128 /**
129  * raptor_abbrev_node_equals:
130  * @node1: node 1
131  * @node2: node 2
132  *
133  * INTERNAL - compare two raptor_abbrev_nodes for equality
134  *
135  * Return value: non-0 if nodes are equal
136  */
137 int
raptor_abbrev_node_equals(raptor_abbrev_node * node1,raptor_abbrev_node * node2)138 raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2)
139 {
140   return raptor_term_equals(node1->term, node2->term);
141 }
142 
143 
144 /**
145  * raptor_abbrev_node_lookup:
146  * @nodes: Tree of nodes to search
147  * @node: Node value to search for
148  *
149  * INTERNAL - Look in an avltree of nodes for a node described by parameters
150  *   and if present create it, add it and return it
151  *
152  * Return value: the node found/created or NULL on failure
153  */
154 raptor_abbrev_node*
raptor_abbrev_node_lookup(raptor_avltree * nodes,raptor_term * term)155 raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term)
156 {
157   raptor_abbrev_node *lookup_node;
158   raptor_abbrev_node *rv_node;
159 
160   /* Create a temporary node for search comparison. */
161   lookup_node = raptor_new_abbrev_node(term->world, term);
162 
163   if(!lookup_node)
164     return NULL;
165 
166   rv_node = (raptor_abbrev_node*)raptor_avltree_search(nodes, lookup_node);
167 
168   /* If not found, insert/return a new one */
169   if(!rv_node) {
170 
171     if(raptor_avltree_add(nodes, lookup_node))
172       return NULL;
173     else
174       return lookup_node;
175 
176   /* Found */
177   } else {
178     raptor_free_abbrev_node(lookup_node);
179     return rv_node;
180   }
181 }
182 
183 
184 static raptor_abbrev_node**
raptor_new_abbrev_po(raptor_abbrev_node * predicate,raptor_abbrev_node * object)185 raptor_new_abbrev_po(raptor_abbrev_node* predicate, raptor_abbrev_node* object)
186 {
187   raptor_abbrev_node** nodes = NULL;
188   nodes = RAPTOR_CALLOC(raptor_abbrev_node**, 2, sizeof(raptor_abbrev_node*));
189   if(!nodes)
190     return NULL;
191 
192   nodes[0] = predicate;
193   nodes[1] = object;
194 
195   return nodes;
196 }
197 
198 
199 static void
raptor_free_abbrev_po(raptor_abbrev_node ** nodes)200 raptor_free_abbrev_po(raptor_abbrev_node** nodes)
201 {
202   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(nodes, raptor_abbrev_node_pair);
203 
204   if(nodes[0])
205     raptor_free_abbrev_node(nodes[0]);
206   if(nodes[1])
207     raptor_free_abbrev_node(nodes[1]);
208 
209   RAPTOR_FREE(raptor_abbrev_nodes, nodes);
210 }
211 
212 
213 static int
raptor_compare_abbrev_po(raptor_abbrev_node ** nodes1,raptor_abbrev_node ** nodes2)214 raptor_compare_abbrev_po(raptor_abbrev_node** nodes1,
215                          raptor_abbrev_node** nodes2)
216 {
217   int d;
218   d = raptor_abbrev_node_compare(nodes1[0], nodes2[0]);
219   if(!d)
220     d = raptor_abbrev_node_compare(nodes1[1], nodes2[1]);
221 
222   return d;
223 }
224 
225 
226 #ifdef RAPTOR_DEBUG
227 static void
raptor_print_abbrev_po(raptor_abbrev_node ** nodes,FILE * handle)228 raptor_print_abbrev_po(raptor_abbrev_node** nodes, FILE* handle)
229 {
230   raptor_abbrev_node* p = nodes[0];
231   raptor_abbrev_node* o = nodes[1];
232 
233   if(p && o) {
234     fputc('[', handle);
235     raptor_term_print_as_ntriples(p->term, handle);
236     fputs(" : ", handle);
237     raptor_term_print_as_ntriples(o->term, handle);
238     fputs("]\n", handle);
239   }
240 }
241 #endif
242 
243 
244 /*
245  * raptor_abbrev_subject implementation
246  *
247  * The subject of triples, with all predicates and values
248  * linked from them.
249  *
250  **/
251 
252 
253 static raptor_abbrev_subject*
raptor_new_abbrev_subject(raptor_abbrev_node * node)254 raptor_new_abbrev_subject(raptor_abbrev_node* node)
255 {
256   raptor_abbrev_subject* subject;
257 
258   if(!(node->term->type == RAPTOR_TERM_TYPE_URI ||
259        node->term->type == RAPTOR_TERM_TYPE_BLANK)) {
260     raptor_log_error(node->world, RAPTOR_LOG_LEVEL_ERROR, NULL,
261                      "Subject node is type %d not a uri or blank node");
262     return NULL;
263   }
264 
265   subject = RAPTOR_CALLOC(raptor_abbrev_subject*, 1, sizeof(*subject));
266 
267   if(subject) {
268     subject->node = node;
269     subject->node->ref_count++;
270     subject->node->count_as_subject++;
271 
272     subject->node_type = NULL;
273 
274     subject->valid = 1;
275 
276     subject->properties =
277       raptor_new_avltree((raptor_data_compare_handler)raptor_compare_abbrev_po,
278                          (raptor_data_free_handler)raptor_free_abbrev_po,
279                          0);
280 #ifdef RAPTOR_DEBUG
281     if(subject->properties)
282       raptor_avltree_set_print_handler(subject->properties,
283                                        (raptor_data_print_handler)raptor_print_abbrev_po);
284 #endif
285 
286     subject->list_items =
287       raptor_new_sequence((raptor_data_free_handler)raptor_free_abbrev_node, NULL);
288 
289     if(!subject->properties || !subject->list_items) {
290       raptor_free_abbrev_subject(subject);
291       subject = NULL;
292     }
293 
294   }
295 
296   return subject;
297 }
298 
299 
300 void
raptor_free_abbrev_subject(raptor_abbrev_subject * subject)301 raptor_free_abbrev_subject(raptor_abbrev_subject* subject)
302 {
303   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(subject, raptor_abbrev_subject);
304 
305   if(subject->node)
306     raptor_free_abbrev_node(subject->node);
307 
308   if(subject->node_type)
309     raptor_free_abbrev_node(subject->node_type);
310 
311   if(subject->properties)
312     raptor_free_avltree(subject->properties);
313 
314   if(subject->list_items)
315     raptor_free_sequence(subject->list_items);
316 
317   RAPTOR_FREE(raptor_subject, subject);
318 }
319 
320 
321 int
raptor_abbrev_subject_valid(raptor_abbrev_subject * subject)322 raptor_abbrev_subject_valid(raptor_abbrev_subject *subject)
323 {
324   return subject->valid;
325 }
326 
327 
328 int
raptor_abbrev_subject_invalidate(raptor_abbrev_subject * subject)329 raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject)
330 {
331   subject->valid = 0;
332   return 0;
333 }
334 
335 
336 
337 
338 /**
339  * raptor_subject_add_property:
340  * @subject: subject node to add to
341  * @predicate: predicate node
342  * @object: object node
343  *
344  * INTERNAL - Add predicate/object pair into properties array of a subject node.
345  *
346  * The subject node takes ownership of the predicate/object nodes.
347  * On error, predicate/object are freed immediately.
348  *
349  * Return value: <0 on failure, >0 if pair is a duplicate and it was not added
350  **/
351 int
raptor_abbrev_subject_add_property(raptor_abbrev_subject * subject,raptor_abbrev_node * predicate,raptor_abbrev_node * object)352 raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject,
353                                    raptor_abbrev_node* predicate,
354                                    raptor_abbrev_node* object)
355 {
356   int err;
357   raptor_abbrev_node** nodes;
358 
359   nodes = raptor_new_abbrev_po(predicate, object);
360   if(!nodes)
361     return -1;
362 
363   predicate->ref_count++;
364   object->ref_count++;
365 
366   if(raptor_avltree_search(subject->properties, nodes)) {
367     /* Already present - do not add a duplicate triple (s->[p o]) */
368     raptor_free_abbrev_po(nodes);
369     return 1;
370   }
371 
372 #if 0
373   fprintf(stderr, "Adding P,O ");
374   raptor_print_abbrev_po(stderr, nodes);
375 
376   raptor_avltree_dump(subject->properties, stderr);
377 #endif
378   err = raptor_avltree_add(subject->properties, nodes);
379   if(err)
380     return -1;
381 #if 0
382   fprintf(stderr, "Result ");
383   raptor_avltree_print(subject->properties, stderr);
384 
385   raptor_avltree_dump(subject->properties, stderr);
386 
387   raptor_avltree_check(subject->properties);
388 
389   fprintf(stderr, "\n\n");
390 #endif
391 
392   return 0;
393 }
394 
395 
396 int
raptor_abbrev_subject_compare(raptor_abbrev_subject * subject1,raptor_abbrev_subject * subject2)397 raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1,
398                               raptor_abbrev_subject* subject2)
399 {
400   return raptor_abbrev_node_compare(subject1->node, subject2->node);
401 }
402 
403 
404 /**
405  * raptor_abbrev_subject_find:
406  * @subjects: AVL-Tree of subject nodes
407  * @term: node to find
408  *
409  * INTERNAL - Find a subject node in an AVL-Tree of subject nodes
410  *
411  * Return value: node or NULL if not found or failure
412  */
413 raptor_abbrev_subject*
raptor_abbrev_subject_find(raptor_avltree * subjects,raptor_term * node)414 raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node)
415 {
416   raptor_abbrev_subject* rv_subject = NULL;
417   raptor_abbrev_node* lookup_node = NULL;
418   raptor_abbrev_subject* lookup = NULL;
419 
420   /* datatype and language are both NULL for a subject node */
421 
422   lookup_node = raptor_new_abbrev_node(node->world, node);
423   if(!lookup_node)
424     return NULL;
425 
426   lookup = raptor_new_abbrev_subject(lookup_node);
427   if(!lookup) {
428     raptor_free_abbrev_node(lookup_node);
429     return NULL;
430   }
431 
432   rv_subject = (raptor_abbrev_subject*) raptor_avltree_search(subjects, lookup);
433 
434   raptor_free_abbrev_subject(lookup);
435   raptor_free_abbrev_node(lookup_node);
436 
437   return rv_subject;
438 }
439 
440 
441 /**
442  * raptor_abbrev_subject_lookup:
443  * @nodes: AVL-Tree of subject nodes
444  * @subjects: AVL-Tree of URI-subject nodes
445  * @blanks: AVL-Tree of blank-subject nodes
446  * @term: node to find
447  *
448  * INTERNAL - Find a subject node in the appropriate uri/blank AVL-Tree of subject nodes or add it
449  *
450  * Return value: node or NULL on failure
451  */
452 raptor_abbrev_subject*
raptor_abbrev_subject_lookup(raptor_avltree * nodes,raptor_avltree * subjects,raptor_avltree * blanks,raptor_term * term)453 raptor_abbrev_subject_lookup(raptor_avltree* nodes,
454                              raptor_avltree* subjects, raptor_avltree* blanks,
455                              raptor_term* term)
456 {
457   raptor_avltree *tree;
458   raptor_abbrev_subject* rv_subject;
459 
460   /* Search for specified resource. */
461   tree = (term->type == RAPTOR_TERM_TYPE_BLANK) ? blanks : subjects;
462   rv_subject = raptor_abbrev_subject_find(tree, term);
463 
464   /* If not found, create one and insert it */
465   if(!rv_subject) {
466     raptor_abbrev_node* node = raptor_abbrev_node_lookup(nodes, term);
467     if(node) {
468       rv_subject = raptor_new_abbrev_subject(node);
469       if(rv_subject) {
470         if(raptor_avltree_add(tree, rv_subject)) {
471           rv_subject = NULL;
472         }
473       }
474     }
475   }
476 
477   return rv_subject;
478 }
479 
480 
481 #ifdef ABBREV_DEBUG
482 void
raptor_print_subject(raptor_abbrev_subject * subject)483 raptor_print_subject(raptor_abbrev_subject* subject)
484 {
485   int i;
486   unsigned char *subj;
487   unsigned char *pred;
488   unsigned char *obj;
489   raptor_avltree_iterator* iter = NULL;
490 
491   /* Note: The raptor_abbrev_node field passed as the first argument for
492    * raptor_term_to_string() is somewhat arbitrary, since as
493    * the data structure is designed, the first word in the value union
494    * is what was passed as the subject/predicate/object of the
495    * statement.
496    */
497   subj = raptor_term_to_string(subject);
498 
499   if(subject->type) {
500     obj = raptor_term_to_string(subject);
501     fprintf(stderr,"[%s, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, %s]\n", subj, obj);
502     RAPTOR_FREE(char*, obj);
503   }
504 
505   for(i = 0; i < raptor_sequence_size(subject->elements); i++) {
506 
507     raptor_abbrev_node* o = raptor_sequence_get_at(subject->elements, i);
508     if(o) {
509       obj = raptor_term_to_string(o);
510       fprintf(stderr,"[%s, [rdf:_%d], %s]\n", subj, i, obj);
511       RAPTOR_FREE(char*, obj);
512     }
513 
514   }
515 
516 
517   iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1);
518   while(iter) {
519     raptor_abbrev_node** nodes;
520     nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter);
521     if(!nodes)
522       break;
523     raptor_print_abbrev_po(stderr, nodes);
524 
525     if(raptor_avltree_iterator_next(iter))
526       break;
527   }
528   if(iter)
529     raptor_free_avltree_iterator(iter);
530 
531   RAPTOR_FREE(char*, subj);
532 
533 }
534 #endif
535 
536 
537 /* helper functions */
538 
539 /**
540  * raptor_new_qname_from_resource:
541  * @namespaces: sequence of namespaces (corresponding to nstack)
542  * @nstack: #raptor_namespace_stack to use/update
543  * @namespace_count: size of nstack (may be modified)
544  * @node: #raptor_abbrev_node to use
545  *
546  * INTERNAL - Make an XML QName from the URI associated with the node.
547  *
548  * Return value: the QName or NULL on failure
549  **/
550 raptor_qname*
raptor_new_qname_from_resource(raptor_sequence * namespaces,raptor_namespace_stack * nstack,int * namespace_count,raptor_abbrev_node * node)551 raptor_new_qname_from_resource(raptor_sequence* namespaces,
552                                raptor_namespace_stack* nstack,
553                                int* namespace_count,
554                                raptor_abbrev_node* node)
555 {
556   unsigned char* name = NULL;  /* where to split predicate name */
557   size_t name_len = 1;
558   unsigned char *uri_string;
559   size_t uri_len;
560   unsigned char *p;
561   raptor_uri *ns_uri;
562   raptor_namespace *ns;
563   raptor_qname *qname;
564   unsigned char *ns_uri_string;
565   size_t ns_uri_string_len;
566 
567   if(node->term->type != RAPTOR_TERM_TYPE_URI) {
568 #ifdef RAPTOR_DEBUG
569     RAPTOR_FATAL1("Node must be a URI\n");
570 #endif
571     return NULL;
572   }
573 
574   qname = raptor_new_qname_from_namespace_uri(nstack, node->term->value.uri, 10);
575   if(qname)
576     return qname;
577 
578   uri_string = raptor_uri_as_counted_string(node->term->value.uri, &uri_len);
579 
580   p= uri_string;
581   name_len = uri_len;
582   while(name_len >0) {
583     if(raptor_xml_name_check(p, name_len, 10)) {
584       name = p;
585       break;
586     }
587     p++; name_len--;
588   }
589 
590   if(!name || (name == uri_string))
591     return NULL;
592 
593   ns_uri_string_len = uri_len - name_len;
594   ns_uri_string = RAPTOR_MALLOC(unsigned char*, ns_uri_string_len + 1);
595   if(!ns_uri_string)
596     return NULL;
597   memcpy(ns_uri_string, (const char*)uri_string, ns_uri_string_len);
598   ns_uri_string[ns_uri_string_len] = '\0';
599 
600   ns_uri = raptor_new_uri_from_counted_string(node->world, ns_uri_string,
601                                               ns_uri_string_len);
602   RAPTOR_FREE(char*, ns_uri_string);
603 
604   if(!ns_uri)
605     return NULL;
606 
607   ns = raptor_namespaces_find_namespace_by_uri(nstack, ns_uri);
608   if(!ns) {
609     /* The namespace was not declared, so create one */
610     unsigned char prefix[2 + MAX_ASCII_INT_SIZE + 1];
611     (*namespace_count)++;
612     prefix[0] = 'n';
613     prefix[1] = 's';
614     (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*,&prefix[2]),
615                                 MAX_ASCII_INT_SIZE + 1, *namespace_count,
616                                 /* base */ 10, -1, '\0');
617 
618     ns = raptor_new_namespace_from_uri(nstack, prefix, ns_uri, 0);
619 
620     /* We'll most likely need this namespace again. Push it on our
621      * stack.  It will be deleted in raptor_rdfxmla_serialize_terminate()
622      */
623     if(raptor_sequence_push(namespaces, ns)) {
624       /* namespaces sequence has no free handler so we have to free
625        * the ns ourselves on error
626        */
627       raptor_free_namespace(ns);
628       raptor_free_uri(ns_uri);
629       return NULL;
630     }
631   }
632 
633   qname = raptor_new_qname_from_namespace_local_name(node->world, ns, name,
634                                                      NULL);
635 
636   raptor_free_uri(ns_uri);
637 
638   return qname;
639 }
640