1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rdfdiff.c - Raptor RDF diff tool
4  *
5  * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  * Copyright (C) 2005, Steve Shepard steveshep@gmail.com
8  *
9  * This package is Free Software and part of Redland http://librdf.org/
10  *
11  * It is licensed under the following three licenses as alternatives:
12  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
13  *   2. GNU General Public License (GPL) V2 or any newer version
14  *   3. Apache License, V2.0 or any newer version
15  *
16  * You may not use this file except in compliance with at least one of
17  * the above three licenses.
18  *
19  * See LICENSE.html or LICENSE.txt at the top of this package for the
20  * complete terms and further detail along with the license texts for
21  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
22  *
23  *
24  */
25 
26 
27 #ifdef HAVE_CONFIG_H
28 #include <raptor_config.h>
29 #endif
30 
31 #ifdef WIN32
32 #include <win32_raptor_config.h>
33 #endif
34 
35 #include <stdio.h>
36 #include <string.h>
37 
38 /* Raptor includes */
39 #include <raptor.h>
40 #include <raptor_internal.h>
41 
42 /* for access() and R_OK */
43 #ifdef HAVE_STDLIB_H
44 #include <stdlib.h>
45 #endif
46 
47 /* many places for getopt */
48 #ifdef HAVE_GETOPT_H
49 #include <getopt.h>
50 #else
51 #include <raptor_getopt.h>
52 #endif
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 
57 #ifdef NEED_OPTIND_DECLARATION
58 extern int optind;
59 extern char *optarg;
60 #endif
61 
62 #define MAX_ASCII_INT_SIZE 13
63 #define RDF_NAMESPACE_URI_LEN 43
64 #define ORDINAL_STRING_LEN (RDF_NAMESPACE_URI_LEN + MAX_ASCII_INT_SIZE + 1)
65 
66 #define GETOPT_STRING "bhf:t:u:"
67 
68 #ifdef HAVE_GETOPT_LONG
69 static const struct option long_options[] =
70 {
71   /* name, has_arg, flag, val */
72   {"brief"       , 0, 0, 'b'},
73   {"help"        , 0, 0, 'h'},
74   {"from-format" , 1, 0, 'f'},
75   {"to-format"   , 1, 0, 't'},
76   {"base-uri"    , 1, 0, 'u'},
77   {NULL          , 0, 0, 0}
78 };
79 #endif
80 
81 #ifdef HAVE_GETOPT_LONG
82 #define HELP_TEXT(short, long, description) "  -" short ", --" long "  " description
83 #define HELP_ARG(short, long) "--" #long
84 #define HELP_PAD "\n                          "
85 #else
86 #define HELP_TEXT(short, long, description) "  -" short "  " description
87 #define HELP_ARG(short, long) "-" #short
88 #define HELP_PAD "\n      "
89 #endif
90 
91 typedef struct rdfdiff_link_s {
92   struct rdfdiff_link_s *next;
93   raptor_statement *statement;
94 } rdfdiff_link;
95 
96 typedef struct rdfdiff_blank_s {
97   struct rdfdiff_blank_s *next;
98   raptor_world *world;
99   char *blank_id;
100   raptor_statement *owner;
101   rdfdiff_link *first;
102   rdfdiff_link *last;
103   int matched;
104 } rdfdiff_blank;
105 
106 typedef struct {
107   raptor_world *world;
108   char *name;
109   raptor_parser *parser;
110   rdfdiff_link *first;
111   rdfdiff_link *last;
112   rdfdiff_blank *first_blank;
113   rdfdiff_blank *last_blank;
114   int statement_count;
115   int error_count;
116   int warning_count;
117   int difference_count;
118 } rdfdiff_file;
119 
120 static int brief = 0;
121 static char *program=NULL;
122 static const char * const title_format_string="Raptor RDF diff utility %s\n";
123 static int ignore_errors = 0;
124 static int ignore_warnings = 0;
125 static int emit_from_header = 1;
126 static int emit_to_header = 1;
127 
128 static rdfdiff_file* from_file = NULL;
129 static rdfdiff_file*to_file = NULL;
130 
131 static rdfdiff_file* rdfdiff_new_file(raptor_world* world, const unsigned char *name, const char *syntax);
132 static void rdfdiff_free_file(rdfdiff_file* file);
133 
134 static rdfdiff_blank *rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id);
135 static rdfdiff_blank *rdfdiff_new_blank(raptor_world *world, char *blank_id);
136 static void rdfdiff_free_blank(rdfdiff_blank *blank);
137 
138 static int  rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
139                                  rdfdiff_file*b1_file, rdfdiff_file*b2_file);
140 
141 static void rdfdiff_error_handler(void *data, raptor_locator *locator, const char *message);
142 static void rdfdiff_warning_handler(void *data, raptor_locator *locator, const char *message);
143 
144 static void rdfdiff_collect_statements(void *user_data, const raptor_statement *statement);
145 
146 int main(int argc, char *argv[]);
147 
148 
149 /* Version of strcmp that can take NULL parameters. Assume that
150  * Non-NULL strings are lexically greater than NULL strings
151  */
152 static int
safe_strcmp(const char * s1,const char * s2)153 safe_strcmp(const char *s1, const char *s2)
154 {
155   if(s1 == NULL && s2 == NULL) {
156     return 0;
157   } else if(s1 == NULL && s2 != NULL) {
158     return -1;
159   } else if(s1 != NULL && s2 == NULL) {
160     return 1;
161   } else {
162     return strcmp(s1, s2);
163   }
164 
165 }
166 
167 
168 #ifdef RDFDIFF_DEBUG
169 static void
rdfdiff_print_statements(rdfdiff_file * file)170 rdfdiff_print_statements(rdfdiff_file* file)
171 {
172   fprintf(stderr, "Statements in %s\n",  file->name);
173   rdfdiff_link *cur = file->first;
174   while (cur) {
175     raptor_print_statement(cur->statement, stderr);
176     fprintf(stderr, "\n");
177     cur = cur->next;
178   }
179 }
180 #endif
181 
182 
183 static rdfdiff_file*
rdfdiff_new_file(raptor_world * world,const unsigned char * name,const char * syntax)184 rdfdiff_new_file(raptor_world *world, const unsigned char *name, const char *syntax)
185 {
186   rdfdiff_file* file = (rdfdiff_file*)RAPTOR_CALLOC(rdfdiff_file, 1, sizeof(rdfdiff_file));
187   if(file) {
188     file->world = world;
189     file->name = (char*)RAPTOR_MALLOC(cstring, strlen((const char*)name)+1);
190     strcpy((char*)file->name, (const char*)name);
191 
192     file->parser = raptor_new_parser_v2(world, syntax);
193     if(file->parser) {
194       raptor_set_error_handler(file->parser, file, rdfdiff_error_handler);
195       raptor_set_warning_handler(file->parser, file, rdfdiff_warning_handler);
196     } else {
197       fprintf(stderr, "%s: Failed to create raptor parser type %s for %s\n",
198               program, syntax, name);
199       rdfdiff_free_file(file);
200       return(0);
201     }
202 
203 
204   }
205 
206   return file;
207 }
208 
209 
210 static void
rdfdiff_free_file(rdfdiff_file * file)211 rdfdiff_free_file(rdfdiff_file* file)
212 {
213   rdfdiff_link *cur, *next;
214   rdfdiff_blank *cur1, *next1;
215 
216   if(file->name)
217     RAPTOR_FREE(cstring, file->name);
218 
219   if(file->parser)
220     raptor_free_parser(file->parser);
221 
222   for(cur = file->first; cur; cur = next) {
223     next = cur->next;
224 
225     raptor_free_statement(file->world, cur->statement);
226     RAPTOR_FREE(rdfdiff_link, cur);
227   }
228 
229   for(cur1 = file->first_blank; cur1; cur1 = next1) {
230     next1 = cur1->next;
231 
232     rdfdiff_free_blank(cur1);
233   }
234 
235   RAPTOR_FREE(rdfdiff_file, file);
236 
237 }
238 
239 
240 static rdfdiff_blank *
rdfdiff_new_blank(raptor_world * world,char * blank_id)241 rdfdiff_new_blank(raptor_world* world, char *blank_id)
242 {
243   rdfdiff_blank *blank = (rdfdiff_blank *)RAPTOR_CALLOC(rdfdiff_blank, 1, sizeof(rdfdiff_blank));
244 
245   if(blank) {
246     blank->world = world;
247     blank->blank_id = (char*)RAPTOR_MALLOC(cstring, strlen(blank_id)+1);
248     strcpy((char*)blank->blank_id, (const char*)blank_id);
249   }
250 
251   return blank;
252 }
253 
254 
255 static void
rdfdiff_free_blank(rdfdiff_blank * blank)256 rdfdiff_free_blank(rdfdiff_blank *blank)
257 {
258   rdfdiff_link *cur, *next;
259 
260   if(blank->blank_id)
261     RAPTOR_FREE(cstring, blank->blank_id);
262 
263   if(blank->owner)
264     raptor_free_statement(blank->world, blank->owner);
265 
266   for(cur = blank->first; cur; cur = next) {
267     next = cur->next;
268 
269     raptor_free_statement(blank->world, cur->statement);
270     RAPTOR_FREE(rdfdiff_link, cur);
271   }
272 
273   RAPTOR_FREE(rdfdiff_blank, blank);
274 
275 }
276 
277 
278 static int
rdfdiff_ordinal_equals_resource(raptor_world * world,int ordinal,raptor_uri * resource)279 rdfdiff_ordinal_equals_resource(raptor_world* world, int ordinal, raptor_uri *resource)
280 {
281   unsigned char ordinal_string[ORDINAL_STRING_LEN + 1];
282   raptor_uri *ordinal_uri;
283   int equal;
284 
285   snprintf((char *)ordinal_string, ORDINAL_STRING_LEN, "%s_%d",
286            raptor_rdf_namespace_uri, ordinal);
287 
288   ordinal_uri = raptor_new_uri_v2(world, ordinal_string);
289 
290   equal = raptor_uri_equals_v2(world, ordinal_uri, resource);
291 
292   raptor_free_uri_v2(world, ordinal_uri);
293 
294   return equal;
295 }
296 
297 
298 static int
rdfdiff_statement_equals(raptor_world * world,const raptor_statement * s1,const raptor_statement * s2)299 rdfdiff_statement_equals(raptor_world *world, const raptor_statement *s1, const raptor_statement *s2)
300 {
301   int rv=0;
302 
303   if(!s1 || !s2)
304     return 0;
305 
306 #if RAPTOR_DEBUG > 2
307   fprintf(stderr, "(rdfdiff_statement_equals) Comparing ");
308   raptor_print_statement(s1, stderr);
309   fprintf(stderr, " to ");
310   raptor_print_statement(s2, stderr);
311 #endif
312 
313   if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL &&
314      s2->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
315 
316     /* check for ordinal/resource equivalence */
317     if(!rdfdiff_ordinal_equals_resource(world,
318                                         *(int *)s1->subject,
319                                         (raptor_uri *)s2->subject)) {
320       rv=0;
321       goto done;
322     }
323 
324   } else if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE &&
325             s2->subject_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
326 
327     /* check for ordinal/resource equivalence */
328     if(!rdfdiff_ordinal_equals_resource(world,
329                                         *(int *)s2->subject,
330                                         (raptor_uri *)s1->subject)) {
331       rv=0;
332       goto done;
333     }
334 
335   } else {
336     /* normal comparison */
337     if(s1->subject_type != s2->subject_type) {
338       rv=0;
339       goto done;
340     }
341 
342     if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
343       /* Here for completeness. Anonymous nodes are taken care of
344        * elsewhere */
345       /*if(strcmp((const char *)s1->subject, (const char *)s2->subject) != 0)
346         return 0;*/
347     } else {
348       if(!raptor_uri_equals_v2(world,
349                                (raptor_uri *)s1->subject,
350                                (raptor_uri *)s2->subject)) {
351         rv=0;
352         goto done;
353       }
354     }
355   }
356 
357   if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL &&
358      s2->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE) {
359 
360     /* check for ordinal/resource equivalence */
361     if(!rdfdiff_ordinal_equals_resource(world,
362                                         *(int *)s1->predicate,
363                                         (raptor_uri *)s2->predicate)) {
364       rv=0;
365       goto done;
366     }
367 
368   } else if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE &&
369             s2->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
370 
371     /* check for ordinal/resource equivalence */
372     if(!rdfdiff_ordinal_equals_resource(world,
373                                         *(int *)s2->predicate,
374                                         (raptor_uri *)s1->predicate)) {
375       rv=0;
376       goto done;
377     }
378 
379   } else {
380 
381     if(s1->predicate_type != s2->predicate_type) {
382       rv=0;
383       goto done;
384     }
385 
386     if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
387       if(*(int *)s1->predicate != *(int *)s2->predicate) {
388         rv=0;
389         goto done;
390       }
391     } else {
392       if(!raptor_uri_equals_v2(world,
393                                (raptor_uri *)s1->predicate,
394                                (raptor_uri *)s2->predicate)) {
395         rv=0;
396         goto done;
397       }
398     }
399   }
400 
401   if(s1->object_type != s2->object_type) {
402     rv=0;
403     goto done;
404   }
405 
406   if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
407      s1->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) {
408     int equal;
409 
410     equal=!safe_strcmp((char *)s1->object, (char *)s2->object);
411 
412     if(equal) {
413       if(s1->object_literal_language && s2->object_literal_language)
414         equal=!strcmp((char *)s1->object_literal_language,
415                       (char *)s2->object_literal_language);
416       else if(s1->object_literal_language || s2->object_literal_language)
417         equal=0;
418       else
419         equal=1;
420 
421       if(equal)
422         equal=raptor_uri_equals_v2(world,
423                                    s1->object_literal_datatype,
424                                    s2->object_literal_datatype);
425     }
426 
427     rv=equal;
428     goto done;
429   } else if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
430     /* Here for completeness. Anonymous nodes are taken care of
431      * elsewhere */
432     /* if(strcmp((const char *)s1->object, (const char *)s2->object) != 0)
433        return 0; */
434   } else if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
435     if(*(int *)s1->object != *(int *)s2->object) {
436       rv=0;
437       goto done;
438     }
439   } else {
440     if(!raptor_uri_equals_v2(world, (raptor_uri *)s1->object, (raptor_uri *)s2->object))
441       rv=0;
442   }
443 
444   rv=1;
445   done:
446 
447 #if RAPTOR_DEBUG > 2
448   fprintf(stderr, " : %s\n", (rv ? "equal" : "not equal"));
449 #endif
450   return rv;
451 }
452 
453 
454 static int
rdfdiff_blank_equals(const rdfdiff_blank * b1,const rdfdiff_blank * b2,rdfdiff_file * b1_file,rdfdiff_file * b2_file)455 rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
456                      rdfdiff_file *b1_file, rdfdiff_file *b2_file)
457 {
458   /* first compare "owners". Owners are subject/predicate or arcs
459    * in. */
460   int equal = 0;
461 
462   if(b1->owner == NULL && b2->owner == NULL) {
463     /* Both are "top-level" anonymous objects. I.E. Neither is the
464      * object of a statement. Fall through and compare based on their
465      * contents. */
466     equal = 1;
467   } else if(b1->owner == NULL || b2->owner == NULL) {
468     equal = 0;
469   } else if(b1->owner->subject_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS &&
470             b2->owner->subject_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
471     /* Neither are anonymous. Normal comparison. This will return
472      * false if both the subject and the predicates don't match. We
473      * know the objects are blank nodes. */
474     equal = rdfdiff_statement_equals(b1->world, b1->owner, b2->owner);
475 
476   } else if(b1->owner->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS &&
477             b2->owner->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
478     rdfdiff_blank *p1;
479     rdfdiff_blank *p2;
480 
481     /* Both are anonymous.  Need further testing. Check that the
482      * containing anononymous nodes are eaual. */
483 #if 0
484     fprintf(stderr, "b1->owner: ");
485     raptor_print_statement(b1->owner, stderr);
486     fprintf(stderr, "\n");
487 
488     fprintf(stderr, "b2->owner: ");
489     raptor_print_statement(b2->owner, stderr);
490     fprintf(stderr, "\n");
491 #endif
492     p1 = rdfdiff_find_blank(b1_file->first_blank, (char *)b1->owner->subject);
493     p2 = rdfdiff_find_blank(b2_file->first_blank, (char *)b2->owner->subject);
494     equal = rdfdiff_blank_equals(p1, p2, b1_file, b2_file);
495   } else {
496     equal = 0;
497   }
498 
499   /* Now compare the contents. This accounts for the case where a
500    * subject has several properties (of the same predicate value) with
501    * different blank nodes as values. */
502   if(equal) {
503     rdfdiff_link *s1 = b1->first;
504     while (s1) {
505 
506       rdfdiff_link *s2 = b2->first;
507       while (s2) {
508 
509         if(rdfdiff_statement_equals(b1->world, s1->statement, s2->statement))
510           break;
511 
512         s2 = s2->next;
513 
514       }
515 
516       if(s2 == 0) {
517         equal = 0;
518         break;
519       }
520 
521       s1 = s1->next;
522 
523     }
524 
525   }
526 
527   return equal;
528 }
529 
530 
531 static void
rdfdiff_error_handler(void * data,raptor_locator * locator,const char * message)532 rdfdiff_error_handler(void *data, raptor_locator *locator,
533                       const char *message)
534 {
535   rdfdiff_file* file = (rdfdiff_file*)data;
536 
537   if(!ignore_errors) {
538     fprintf(stderr, "%s: Error - ", program);
539     raptor_print_locator_v2(file->world, stderr, locator);
540     fprintf(stderr, " - %s\n", message);
541 
542     raptor_parse_abort(file->parser);
543   }
544 
545   file->error_count++;
546 
547 }
548 
549 
550 static void
rdfdiff_warning_handler(void * data,raptor_locator * locator,const char * message)551 rdfdiff_warning_handler(void *data, raptor_locator *locator,
552                         const char *message)
553 {
554   rdfdiff_file* file = (rdfdiff_file*)data;
555 
556   if(!ignore_warnings) {
557     fprintf(stderr, "%s: Warning - ", program);
558     raptor_print_locator_v2(file->world, stderr, locator);
559     fprintf(stderr, " - %s\n", message);
560   }
561 
562   file->warning_count++;
563 
564 }
565 
566 
567 static rdfdiff_blank *
rdfdiff_find_blank(rdfdiff_blank * first,char * blank_id)568 rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id)
569 {
570   rdfdiff_blank *rv_blank = 0;
571   rdfdiff_blank *cur = first;
572 
573   while (cur) {
574 
575     if(strcmp(cur->blank_id, blank_id) == 0) {
576       rv_blank = cur;
577       break;
578     }
579 
580     cur = cur->next;
581 
582   }
583 
584   return rv_blank;
585 
586 }
587 
588 
589 static rdfdiff_blank *
rdfdiff_lookup_blank(rdfdiff_file * file,char * blank_id)590 rdfdiff_lookup_blank(rdfdiff_file* file, char *blank_id)
591 {
592   rdfdiff_blank *rv_blank = rdfdiff_find_blank(file->first_blank, blank_id);
593 
594   if(rv_blank == NULL) {
595     rv_blank = rdfdiff_new_blank(file->world, blank_id);
596     if(rv_blank) {
597 
598       if(!file->first_blank) {
599         file->first_blank = rv_blank;
600         file->last_blank = rv_blank;
601       } else {
602         file->last_blank->next = rv_blank;
603         file->last_blank = rv_blank;
604       }
605     }
606   }
607 
608   return rv_blank;
609 
610 }
611 
612 
613 static int
rdfdiff_add_blank_statement(rdfdiff_file * file,const raptor_statement * statement)614 rdfdiff_add_blank_statement(rdfdiff_file* file,
615                             const raptor_statement *statement)
616 {
617   rdfdiff_blank *blank;
618   rdfdiff_link *dlink;
619 
620   blank = rdfdiff_lookup_blank(file, (char *)statement->subject);
621   if(!blank)
622     goto failed;
623 
624   dlink = (rdfdiff_link *)RAPTOR_MALLOC(rdfdiff_link, sizeof(rdfdiff_link));
625   if(!dlink)
626     goto failed;
627 
628   dlink->statement = raptor_statement_copy(file->world, statement);
629   if(!dlink->statement) {
630     RAPTOR_FREE(rdfdiff_link, dlink);
631     goto failed;
632   }
633 
634   dlink->next = NULL;
635   if(!blank->first) {
636     blank->first = dlink;
637     blank->last = dlink;
638   } else {
639     blank->last->next = dlink;
640     blank->last = dlink;
641   }
642 
643   return 0;
644 
645 failed:
646   fprintf(stderr, "%s: Internal Error\n", program);
647   return 1;
648 }
649 
650 
651 static int
rdfdiff_add_blank_statement_owner(rdfdiff_file * file,const raptor_statement * statement)652 rdfdiff_add_blank_statement_owner(rdfdiff_file* file,
653                                   const raptor_statement *statement)
654 {
655   rdfdiff_blank *blank;
656 
657   blank = rdfdiff_lookup_blank(file, (char *)statement->object);
658   if(!blank)
659     goto failed;
660 
661   blank->owner = raptor_statement_copy(file->world, statement);
662   if(!blank->owner)
663     goto failed;
664 
665   return 0;
666 
667 failed:
668   fprintf(stderr, "%s: Internal Error\n", program);
669   return 1;
670 }
671 
672 
673 static int
rdfdiff_add_statement(rdfdiff_file * file,const raptor_statement * statement)674 rdfdiff_add_statement(rdfdiff_file* file, const raptor_statement *statement)
675 {
676   int rv = 0;
677 
678   rdfdiff_link *dlink = (rdfdiff_link *)RAPTOR_MALLOC(rdfdiff_link, sizeof(rdfdiff_link));
679 
680   if(dlink) {
681 
682     dlink->statement = raptor_statement_copy(file->world, statement);
683 
684     if(dlink->statement) {
685 
686       dlink->next = NULL;
687 
688       if(!file->first) {
689         file->first = dlink;
690         file->last = dlink;
691       } else {
692         file->last->next = dlink;
693         file->last = dlink;
694       }
695 
696     } else {
697       RAPTOR_FREE(rdfdiff_link, dlink);
698       rv = 1;
699     }
700 
701   } else {
702     rv = 1;
703   }
704 
705   if(rv != 0)
706     fprintf(stderr, "%s: Internal Error\n", program);
707 
708   return rv;
709 
710 }
711 
712 
713 static rdfdiff_link*
rdfdiff_statement_find(rdfdiff_file * file,const raptor_statement * statement,rdfdiff_link ** prev_p)714 rdfdiff_statement_find(rdfdiff_file* file, const raptor_statement *statement,
715                        rdfdiff_link** prev_p)
716 {
717   rdfdiff_link* prev = NULL;
718   rdfdiff_link* cur = file->first;
719 
720   while(cur) {
721     if(rdfdiff_statement_equals(file->world, cur->statement, statement)) {
722       if(prev_p)
723         *prev_p=prev;
724       return cur;
725     }
726     prev=cur;
727     cur=cur->next;
728   }
729 
730   return NULL;
731 }
732 
733 
734 static int
rdfdiff_statement_exists(rdfdiff_file * file,const raptor_statement * statement)735 rdfdiff_statement_exists(rdfdiff_file* file, const raptor_statement *statement)
736 {
737   rdfdiff_link* node;
738   rdfdiff_link* prev=NULL;
739   node=rdfdiff_statement_find(file, statement, &prev);
740   return (node != NULL);
741 }
742 
743 
744 /*
745  * rdfdiff_collect_statements - Called when parsing "from" file to build a
746  * list of statements for comparison with those in the "to" file.
747  */
748 static void
rdfdiff_collect_statements(void * user_data,const raptor_statement * statement)749 rdfdiff_collect_statements(void *user_data, const raptor_statement *statement)
750 {
751   int rv = 0;
752   rdfdiff_file* file = (rdfdiff_file*)user_data;
753 
754   if(rdfdiff_statement_exists(file, statement))
755     return;
756 
757   file->statement_count++;
758 
759   if(statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS ||
760       statement->object_type  == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
761 
762     if(statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
763       rv = rdfdiff_add_blank_statement(file, statement);
764 
765     if(rv == 0 && statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
766       rv = rdfdiff_add_blank_statement_owner(file, statement);
767 
768   } else {
769     rv = rdfdiff_add_statement(file, statement);
770   }
771 
772   if(rv != 0) {
773     raptor_parse_abort(file->parser);
774   }
775 
776 }
777 
778 
779 
780 int
main(int argc,char * argv[])781 main(int argc, char *argv[])
782 {
783   raptor_world *world = NULL;
784   unsigned char *from_string=NULL;
785   unsigned char *to_string=NULL;
786   raptor_uri *from_uri=NULL;
787   raptor_uri *to_uri=NULL;
788   raptor_uri *base_uri=NULL;
789   const char *from_syntax = "rdfxml";
790   const char *to_syntax = "rdfxml";
791   int free_from_string = 0;
792   int free_to_string = 0;
793   int usage=0;
794   int help=0;
795   char *p;
796   int rv = 0;
797   rdfdiff_blank *b1;
798   rdfdiff_link *cur;
799 
800   program=argv[0];
801   if((p=strrchr(program, '/')))
802     program=p+1;
803   else if((p=strrchr(program, '\\')))
804     program=p+1;
805   argv[0]=program;
806 
807   world = raptor_new_world();
808   if(!world)
809     exit(1);
810   rv = raptor_world_open(world);
811   if(rv)
812     exit(1);
813 
814   while (!usage && !help)
815   {
816     int c;
817 #ifdef HAVE_GETOPT_LONG
818     int option_index = 0;
819 
820     c = getopt_long (argc, argv, GETOPT_STRING, long_options, &option_index);
821 #else
822     c = getopt (argc, argv, GETOPT_STRING);
823 #endif
824     if(c == -1)
825       break;
826 
827     switch (c) {
828       case 0:
829       case '?': /* getopt() - unknown option */
830         usage=1;
831         break;
832 
833       case 'b':
834         brief = 1;
835         break;
836 
837       case 'h':
838         help=1;
839         break;
840 
841       case 'f':
842         if(optarg)
843           from_syntax = optarg;
844         break;
845 
846       case 't':
847         if(optarg)
848           to_syntax = optarg;
849         break;
850 
851       case 'u':
852         if(optarg)
853           base_uri = raptor_new_uri_v2(world, (const unsigned char*)optarg);
854         break;
855 
856     }
857 
858   }
859 
860   if(optind != argc-2)
861     help = 1;
862 
863   if(usage) {
864     if(usage>1) {
865       fprintf(stderr, title_format_string, raptor_version_string);
866       fputs(raptor_short_copyright_string, stderr);
867       fputc('\n', stderr);
868     }
869     fprintf(stderr, "Try `%s " HELP_ARG(h, help) "' for more information.\n",
870                     program);
871     rv = 1;
872     goto exit;
873   }
874 
875   if(help) {
876     printf("Usage: %s [OPTIONS] <from URI> <to URI>\n", program);
877     printf(title_format_string, raptor_version_string);
878     puts(raptor_short_copyright_string);
879     puts("Find differences between two RDF files.");
880     puts("\nOPTIONS:");
881     puts(HELP_TEXT("h", "help                      ", "Print this help, then exit"));
882     puts(HELP_TEXT("b", "brief                     ", "Report only whether files differ"));
883     puts(HELP_TEXT("u BASE-URI", "base-uri BASE-URI  ", "Set the base URI for the files"));
884     puts(HELP_TEXT("f FORMAT",   "from-format FORMAT ", "Format of <from URI> (default is rdfxml)"));
885     puts(HELP_TEXT("t FORMAT",   "to-format FORMAT   ", "Format of <to URI> (default is rdfxml)"));
886     rv = 1;
887     goto exit;
888   }
889 
890   from_string = (unsigned char *)argv[optind++];
891   to_string = (unsigned char *)argv[optind];
892 
893   if(!access((const char *)from_string, R_OK)) {
894     char *filename = (char *)from_string;
895     from_string = raptor_uri_filename_to_uri_string(filename);
896     if(!from_string) {
897       fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
898       rv = 2;
899       goto exit;
900     }
901     free_from_string = 1;
902   }
903 
904   if(!access((const char *)to_string, R_OK)) {
905     char *filename = (char *)to_string;
906     to_string = raptor_uri_filename_to_uri_string(filename);
907     if(!to_string) {
908       fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
909       rv = 2;
910       goto exit;
911     }
912     free_to_string = 1;
913   }
914 
915   if(from_string) {
916     from_uri = raptor_new_uri_v2(world, from_string);
917     if(!from_uri) {
918       fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
919       rv = 2;
920       goto exit;
921     }
922   }
923 
924   if(to_string) {
925     to_uri = raptor_new_uri_v2(world, to_string);
926     if(!to_uri) {
927       fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
928       rv = 2;
929       goto exit;
930     }
931   }
932 
933   /* create and init "from" data structures */
934   from_file = rdfdiff_new_file(world, from_string, from_syntax);
935   if(!from_file) {
936     rv = 2;
937     goto exit;
938   }
939 
940   /* create and init "to" data structures */
941   to_file = rdfdiff_new_file(world, to_string, to_syntax);
942   if(!to_file) {
943     rv = 2;
944     goto exit;
945   }
946 
947   /* parse the files */
948   raptor_set_statement_handler(from_file->parser, from_file,
949                                rdfdiff_collect_statements);
950 
951   if(raptor_parse_uri(from_file->parser, from_uri, base_uri)) {
952     fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
953             from_string, from_syntax);
954     rv = 1;
955     goto exit;
956   } else {
957 
958     /* Note intentional from_uri as base_uri */
959     raptor_set_statement_handler(to_file->parser, to_file,
960                                  rdfdiff_collect_statements);
961     if(raptor_parse_uri(to_file->parser, to_uri, base_uri ? base_uri: from_uri)) {
962       fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
963               to_string, to_syntax);
964       rv = 1;
965       goto exit;
966     }
967   }
968 
969 
970   /* Compare triples with no blank nodes */
971   cur = to_file->first;
972   while(cur) {
973     rdfdiff_link* node;
974     rdfdiff_link* prev;
975     node=rdfdiff_statement_find(from_file, cur->statement, &prev);
976     if(node) {
977       /* exists in from file - remove it from the list */
978       if(from_file->first == node) {
979         from_file->first = node->next;
980       } else {
981         prev->next = node->next;
982       }
983       raptor_free_statement(world, node->statement);
984       RAPTOR_FREE(rdfdiff_link, node);
985     } else {
986       if(!brief) {
987         if(emit_from_header) {
988           fprintf(stderr, "Statements in %s but not in %s\n",
989                   to_file->name, from_file->name);
990           emit_from_header = 0;
991         }
992 
993         fprintf(stderr, "<    ");
994         raptor_print_statement_v1(world, cur->statement, stderr);
995         fprintf(stderr, "\n");
996       }
997 
998       to_file->difference_count++;
999     }
1000     cur=cur->next;
1001   }
1002 
1003 
1004   /* Now compare the blank nodes */
1005   b1 = to_file->first_blank;
1006   while (b1) {
1007 
1008     rdfdiff_blank *b2 = from_file->first_blank;
1009 
1010     while (b2) {
1011 
1012       if(!b2->matched && rdfdiff_blank_equals(b1, b2, to_file, from_file)) {
1013         b1->matched = 1;
1014         b2->matched = 1;
1015         break;
1016       }
1017 
1018       b2 = b2->next;
1019 
1020     }
1021 
1022     if(b2 == 0) {
1023       if(!brief) {
1024 #if 0
1025         fprintf(stderr, "<    ");
1026         raptor_print_statement(b1->owner, stderr);
1027         fprintf(stderr, "\n");
1028 #else
1029         if(emit_from_header) {
1030           fprintf(stderr, "Statements in %s but not in %s\n",  to_file->name, from_file->name);
1031           emit_from_header = 0;
1032         }
1033 
1034         fprintf(stderr, "<    anonymous node %s\n", b1->blank_id);
1035 #endif
1036       }
1037 
1038       to_file->difference_count++;
1039     }
1040 
1041     b1 = b1->next;
1042 
1043   }
1044 
1045   if(from_file->first) {
1046     /* The entrys left in from_file have not been found in to_file. */
1047     if(!brief) {
1048 
1049       if(emit_to_header) {
1050         fprintf(stderr, "Statements in %s but not in %s\n",  from_file->name,
1051                 to_file->name);
1052         emit_to_header = 0;
1053       }
1054 
1055       cur = from_file->first;
1056       while (cur) {
1057         if(!brief) {
1058           fprintf(stderr, ">    ");
1059           raptor_print_statement_v1(world, cur->statement, stderr);
1060           fprintf(stderr, "\n");
1061         }
1062 
1063         cur = cur->next;
1064         from_file->difference_count++;
1065       }
1066     }
1067 
1068   }
1069 
1070   if(from_file->first_blank) {
1071     rdfdiff_blank *blank = from_file->first_blank;
1072     while (blank) {
1073 
1074       if(!blank->matched) {
1075         if(!brief) {
1076 #if 0
1077           fprintf(stderr, ">    ");
1078           raptor_print_statement(blank->owner, stderr);
1079           fprintf(stderr, "\n");
1080 #else
1081           if(emit_to_header) {
1082             fprintf(stderr, "Statements in %s but not in %s\n",  from_file->name, to_file->name);
1083             emit_to_header = 0;
1084           }
1085           fprintf(stderr, ">    anonymous node %s\n", blank->blank_id);
1086 #endif
1087         }
1088         from_file->difference_count++;
1089       }
1090 
1091       blank = blank->next;
1092 
1093     }
1094 
1095   }
1096 
1097   if(!(from_file->difference_count == 0 &&
1098         to_file->difference_count == 0)) {
1099 
1100     if(brief)
1101       fprintf(stderr, "Files differ\n");
1102 
1103     rv = 1;
1104   }
1105 
1106 exit:
1107 
1108   if(base_uri)
1109     raptor_free_uri_v2(world, base_uri);
1110 
1111   if(from_file)
1112     rdfdiff_free_file(from_file);
1113 
1114   if(to_file)
1115     rdfdiff_free_file(to_file);
1116 
1117   if(free_from_string)
1118     raptor_free_memory(from_string);
1119 
1120   if(free_to_string)
1121     raptor_free_memory(to_string);
1122 
1123   if(from_uri)
1124     raptor_free_uri_v2(world, from_uri);
1125 
1126   if(to_uri)
1127     raptor_free_uri_v2(world, to_uri);
1128 
1129   raptor_free_world(world);
1130 
1131   return rv;
1132 
1133 }
1134 
1135