1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * rdfdiff.c - Raptor RDF diff tool
4 *
5 * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 * Copyright (C) 2005, Steve Shepard steveshep@gmail.com
8 *
9 * This package is Free Software and part of Redland http://librdf.org/
10 *
11 * It is licensed under the following three licenses as alternatives:
12 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
13 * 2. GNU General Public License (GPL) V2 or any newer version
14 * 3. Apache License, V2.0 or any newer version
15 *
16 * You may not use this file except in compliance with at least one of
17 * the above three licenses.
18 *
19 * See LICENSE.html or LICENSE.txt at the top of this package for the
20 * complete terms and further detail along with the license texts for
21 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
22 *
23 *
24 */
25
26
27 #ifdef HAVE_CONFIG_H
28 #include <raptor_config.h>
29 #endif
30
31 #ifdef WIN32
32 #include <win32_raptor_config.h>
33 #endif
34
35 #include <stdio.h>
36 #include <string.h>
37
38 /* Raptor includes */
39 #include <raptor.h>
40 #include <raptor_internal.h>
41
42 /* for access() and R_OK */
43 #ifdef HAVE_STDLIB_H
44 #include <stdlib.h>
45 #endif
46
47 /* many places for getopt */
48 #ifdef HAVE_GETOPT_H
49 #include <getopt.h>
50 #else
51 #include <raptor_getopt.h>
52 #endif
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56
57 #ifdef NEED_OPTIND_DECLARATION
58 extern int optind;
59 extern char *optarg;
60 #endif
61
62 #define MAX_ASCII_INT_SIZE 13
63 #define RDF_NAMESPACE_URI_LEN 43
64 #define ORDINAL_STRING_LEN (RDF_NAMESPACE_URI_LEN + MAX_ASCII_INT_SIZE + 1)
65
66 #define GETOPT_STRING "bhf:t:u:"
67
68 #ifdef HAVE_GETOPT_LONG
69 static const struct option long_options[] =
70 {
71 /* name, has_arg, flag, val */
72 {"brief" , 0, 0, 'b'},
73 {"help" , 0, 0, 'h'},
74 {"from-format" , 1, 0, 'f'},
75 {"to-format" , 1, 0, 't'},
76 {"base-uri" , 1, 0, 'u'},
77 {NULL , 0, 0, 0}
78 };
79 #endif
80
81 #ifdef HAVE_GETOPT_LONG
82 #define HELP_TEXT(short, long, description) " -" short ", --" long " " description
83 #define HELP_ARG(short, long) "--" #long
84 #define HELP_PAD "\n "
85 #else
86 #define HELP_TEXT(short, long, description) " -" short " " description
87 #define HELP_ARG(short, long) "-" #short
88 #define HELP_PAD "\n "
89 #endif
90
91 typedef struct rdfdiff_link_s {
92 struct rdfdiff_link_s *next;
93 raptor_statement *statement;
94 } rdfdiff_link;
95
96 typedef struct rdfdiff_blank_s {
97 struct rdfdiff_blank_s *next;
98 raptor_world *world;
99 char *blank_id;
100 raptor_statement *owner;
101 rdfdiff_link *first;
102 rdfdiff_link *last;
103 int matched;
104 } rdfdiff_blank;
105
106 typedef struct {
107 raptor_world *world;
108 char *name;
109 raptor_parser *parser;
110 rdfdiff_link *first;
111 rdfdiff_link *last;
112 rdfdiff_blank *first_blank;
113 rdfdiff_blank *last_blank;
114 int statement_count;
115 int error_count;
116 int warning_count;
117 int difference_count;
118 } rdfdiff_file;
119
120 static int brief = 0;
121 static char *program=NULL;
122 static const char * const title_format_string="Raptor RDF diff utility %s\n";
123 static int ignore_errors = 0;
124 static int ignore_warnings = 0;
125 static int emit_from_header = 1;
126 static int emit_to_header = 1;
127
128 static rdfdiff_file* from_file = NULL;
129 static rdfdiff_file*to_file = NULL;
130
131 static rdfdiff_file* rdfdiff_new_file(raptor_world* world, const unsigned char *name, const char *syntax);
132 static void rdfdiff_free_file(rdfdiff_file* file);
133
134 static rdfdiff_blank *rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id);
135 static rdfdiff_blank *rdfdiff_new_blank(raptor_world *world, char *blank_id);
136 static void rdfdiff_free_blank(rdfdiff_blank *blank);
137
138 static int rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
139 rdfdiff_file*b1_file, rdfdiff_file*b2_file);
140
141 static void rdfdiff_error_handler(void *data, raptor_locator *locator, const char *message);
142 static void rdfdiff_warning_handler(void *data, raptor_locator *locator, const char *message);
143
144 static void rdfdiff_collect_statements(void *user_data, const raptor_statement *statement);
145
146 int main(int argc, char *argv[]);
147
148
149 /* Version of strcmp that can take NULL parameters. Assume that
150 * Non-NULL strings are lexically greater than NULL strings
151 */
152 static int
safe_strcmp(const char * s1,const char * s2)153 safe_strcmp(const char *s1, const char *s2)
154 {
155 if(s1 == NULL && s2 == NULL) {
156 return 0;
157 } else if(s1 == NULL && s2 != NULL) {
158 return -1;
159 } else if(s1 != NULL && s2 == NULL) {
160 return 1;
161 } else {
162 return strcmp(s1, s2);
163 }
164
165 }
166
167
168 #ifdef RDFDIFF_DEBUG
169 static void
rdfdiff_print_statements(rdfdiff_file * file)170 rdfdiff_print_statements(rdfdiff_file* file)
171 {
172 fprintf(stderr, "Statements in %s\n", file->name);
173 rdfdiff_link *cur = file->first;
174 while (cur) {
175 raptor_print_statement(cur->statement, stderr);
176 fprintf(stderr, "\n");
177 cur = cur->next;
178 }
179 }
180 #endif
181
182
183 static rdfdiff_file*
rdfdiff_new_file(raptor_world * world,const unsigned char * name,const char * syntax)184 rdfdiff_new_file(raptor_world *world, const unsigned char *name, const char *syntax)
185 {
186 rdfdiff_file* file = (rdfdiff_file*)RAPTOR_CALLOC(rdfdiff_file, 1, sizeof(rdfdiff_file));
187 if(file) {
188 file->world = world;
189 file->name = (char*)RAPTOR_MALLOC(cstring, strlen((const char*)name)+1);
190 strcpy((char*)file->name, (const char*)name);
191
192 file->parser = raptor_new_parser_v2(world, syntax);
193 if(file->parser) {
194 raptor_set_error_handler(file->parser, file, rdfdiff_error_handler);
195 raptor_set_warning_handler(file->parser, file, rdfdiff_warning_handler);
196 } else {
197 fprintf(stderr, "%s: Failed to create raptor parser type %s for %s\n",
198 program, syntax, name);
199 rdfdiff_free_file(file);
200 return(0);
201 }
202
203
204 }
205
206 return file;
207 }
208
209
210 static void
rdfdiff_free_file(rdfdiff_file * file)211 rdfdiff_free_file(rdfdiff_file* file)
212 {
213 rdfdiff_link *cur, *next;
214 rdfdiff_blank *cur1, *next1;
215
216 if(file->name)
217 RAPTOR_FREE(cstring, file->name);
218
219 if(file->parser)
220 raptor_free_parser(file->parser);
221
222 for(cur = file->first; cur; cur = next) {
223 next = cur->next;
224
225 raptor_free_statement(file->world, cur->statement);
226 RAPTOR_FREE(rdfdiff_link, cur);
227 }
228
229 for(cur1 = file->first_blank; cur1; cur1 = next1) {
230 next1 = cur1->next;
231
232 rdfdiff_free_blank(cur1);
233 }
234
235 RAPTOR_FREE(rdfdiff_file, file);
236
237 }
238
239
240 static rdfdiff_blank *
rdfdiff_new_blank(raptor_world * world,char * blank_id)241 rdfdiff_new_blank(raptor_world* world, char *blank_id)
242 {
243 rdfdiff_blank *blank = (rdfdiff_blank *)RAPTOR_CALLOC(rdfdiff_blank, 1, sizeof(rdfdiff_blank));
244
245 if(blank) {
246 blank->world = world;
247 blank->blank_id = (char*)RAPTOR_MALLOC(cstring, strlen(blank_id)+1);
248 strcpy((char*)blank->blank_id, (const char*)blank_id);
249 }
250
251 return blank;
252 }
253
254
255 static void
rdfdiff_free_blank(rdfdiff_blank * blank)256 rdfdiff_free_blank(rdfdiff_blank *blank)
257 {
258 rdfdiff_link *cur, *next;
259
260 if(blank->blank_id)
261 RAPTOR_FREE(cstring, blank->blank_id);
262
263 if(blank->owner)
264 raptor_free_statement(blank->world, blank->owner);
265
266 for(cur = blank->first; cur; cur = next) {
267 next = cur->next;
268
269 raptor_free_statement(blank->world, cur->statement);
270 RAPTOR_FREE(rdfdiff_link, cur);
271 }
272
273 RAPTOR_FREE(rdfdiff_blank, blank);
274
275 }
276
277
278 static int
rdfdiff_ordinal_equals_resource(raptor_world * world,int ordinal,raptor_uri * resource)279 rdfdiff_ordinal_equals_resource(raptor_world* world, int ordinal, raptor_uri *resource)
280 {
281 unsigned char ordinal_string[ORDINAL_STRING_LEN + 1];
282 raptor_uri *ordinal_uri;
283 int equal;
284
285 snprintf((char *)ordinal_string, ORDINAL_STRING_LEN, "%s_%d",
286 raptor_rdf_namespace_uri, ordinal);
287
288 ordinal_uri = raptor_new_uri_v2(world, ordinal_string);
289
290 equal = raptor_uri_equals_v2(world, ordinal_uri, resource);
291
292 raptor_free_uri_v2(world, ordinal_uri);
293
294 return equal;
295 }
296
297
298 static int
rdfdiff_statement_equals(raptor_world * world,const raptor_statement * s1,const raptor_statement * s2)299 rdfdiff_statement_equals(raptor_world *world, const raptor_statement *s1, const raptor_statement *s2)
300 {
301 int rv=0;
302
303 if(!s1 || !s2)
304 return 0;
305
306 #if RAPTOR_DEBUG > 2
307 fprintf(stderr, "(rdfdiff_statement_equals) Comparing ");
308 raptor_print_statement(s1, stderr);
309 fprintf(stderr, " to ");
310 raptor_print_statement(s2, stderr);
311 #endif
312
313 if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL &&
314 s2->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
315
316 /* check for ordinal/resource equivalence */
317 if(!rdfdiff_ordinal_equals_resource(world,
318 *(int *)s1->subject,
319 (raptor_uri *)s2->subject)) {
320 rv=0;
321 goto done;
322 }
323
324 } else if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE &&
325 s2->subject_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
326
327 /* check for ordinal/resource equivalence */
328 if(!rdfdiff_ordinal_equals_resource(world,
329 *(int *)s2->subject,
330 (raptor_uri *)s1->subject)) {
331 rv=0;
332 goto done;
333 }
334
335 } else {
336 /* normal comparison */
337 if(s1->subject_type != s2->subject_type) {
338 rv=0;
339 goto done;
340 }
341
342 if(s1->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
343 /* Here for completeness. Anonymous nodes are taken care of
344 * elsewhere */
345 /*if(strcmp((const char *)s1->subject, (const char *)s2->subject) != 0)
346 return 0;*/
347 } else {
348 if(!raptor_uri_equals_v2(world,
349 (raptor_uri *)s1->subject,
350 (raptor_uri *)s2->subject)) {
351 rv=0;
352 goto done;
353 }
354 }
355 }
356
357 if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL &&
358 s2->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE) {
359
360 /* check for ordinal/resource equivalence */
361 if(!rdfdiff_ordinal_equals_resource(world,
362 *(int *)s1->predicate,
363 (raptor_uri *)s2->predicate)) {
364 rv=0;
365 goto done;
366 }
367
368 } else if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE &&
369 s2->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
370
371 /* check for ordinal/resource equivalence */
372 if(!rdfdiff_ordinal_equals_resource(world,
373 *(int *)s2->predicate,
374 (raptor_uri *)s1->predicate)) {
375 rv=0;
376 goto done;
377 }
378
379 } else {
380
381 if(s1->predicate_type != s2->predicate_type) {
382 rv=0;
383 goto done;
384 }
385
386 if(s1->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
387 if(*(int *)s1->predicate != *(int *)s2->predicate) {
388 rv=0;
389 goto done;
390 }
391 } else {
392 if(!raptor_uri_equals_v2(world,
393 (raptor_uri *)s1->predicate,
394 (raptor_uri *)s2->predicate)) {
395 rv=0;
396 goto done;
397 }
398 }
399 }
400
401 if(s1->object_type != s2->object_type) {
402 rv=0;
403 goto done;
404 }
405
406 if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
407 s1->object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) {
408 int equal;
409
410 equal=!safe_strcmp((char *)s1->object, (char *)s2->object);
411
412 if(equal) {
413 if(s1->object_literal_language && s2->object_literal_language)
414 equal=!strcmp((char *)s1->object_literal_language,
415 (char *)s2->object_literal_language);
416 else if(s1->object_literal_language || s2->object_literal_language)
417 equal=0;
418 else
419 equal=1;
420
421 if(equal)
422 equal=raptor_uri_equals_v2(world,
423 s1->object_literal_datatype,
424 s2->object_literal_datatype);
425 }
426
427 rv=equal;
428 goto done;
429 } else if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
430 /* Here for completeness. Anonymous nodes are taken care of
431 * elsewhere */
432 /* if(strcmp((const char *)s1->object, (const char *)s2->object) != 0)
433 return 0; */
434 } else if(s1->object_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
435 if(*(int *)s1->object != *(int *)s2->object) {
436 rv=0;
437 goto done;
438 }
439 } else {
440 if(!raptor_uri_equals_v2(world, (raptor_uri *)s1->object, (raptor_uri *)s2->object))
441 rv=0;
442 }
443
444 rv=1;
445 done:
446
447 #if RAPTOR_DEBUG > 2
448 fprintf(stderr, " : %s\n", (rv ? "equal" : "not equal"));
449 #endif
450 return rv;
451 }
452
453
454 static int
rdfdiff_blank_equals(const rdfdiff_blank * b1,const rdfdiff_blank * b2,rdfdiff_file * b1_file,rdfdiff_file * b2_file)455 rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
456 rdfdiff_file *b1_file, rdfdiff_file *b2_file)
457 {
458 /* first compare "owners". Owners are subject/predicate or arcs
459 * in. */
460 int equal = 0;
461
462 if(b1->owner == NULL && b2->owner == NULL) {
463 /* Both are "top-level" anonymous objects. I.E. Neither is the
464 * object of a statement. Fall through and compare based on their
465 * contents. */
466 equal = 1;
467 } else if(b1->owner == NULL || b2->owner == NULL) {
468 equal = 0;
469 } else if(b1->owner->subject_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS &&
470 b2->owner->subject_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
471 /* Neither are anonymous. Normal comparison. This will return
472 * false if both the subject and the predicates don't match. We
473 * know the objects are blank nodes. */
474 equal = rdfdiff_statement_equals(b1->world, b1->owner, b2->owner);
475
476 } else if(b1->owner->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS &&
477 b2->owner->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
478 rdfdiff_blank *p1;
479 rdfdiff_blank *p2;
480
481 /* Both are anonymous. Need further testing. Check that the
482 * containing anononymous nodes are eaual. */
483 #if 0
484 fprintf(stderr, "b1->owner: ");
485 raptor_print_statement(b1->owner, stderr);
486 fprintf(stderr, "\n");
487
488 fprintf(stderr, "b2->owner: ");
489 raptor_print_statement(b2->owner, stderr);
490 fprintf(stderr, "\n");
491 #endif
492 p1 = rdfdiff_find_blank(b1_file->first_blank, (char *)b1->owner->subject);
493 p2 = rdfdiff_find_blank(b2_file->first_blank, (char *)b2->owner->subject);
494 equal = rdfdiff_blank_equals(p1, p2, b1_file, b2_file);
495 } else {
496 equal = 0;
497 }
498
499 /* Now compare the contents. This accounts for the case where a
500 * subject has several properties (of the same predicate value) with
501 * different blank nodes as values. */
502 if(equal) {
503 rdfdiff_link *s1 = b1->first;
504 while (s1) {
505
506 rdfdiff_link *s2 = b2->first;
507 while (s2) {
508
509 if(rdfdiff_statement_equals(b1->world, s1->statement, s2->statement))
510 break;
511
512 s2 = s2->next;
513
514 }
515
516 if(s2 == 0) {
517 equal = 0;
518 break;
519 }
520
521 s1 = s1->next;
522
523 }
524
525 }
526
527 return equal;
528 }
529
530
531 static void
rdfdiff_error_handler(void * data,raptor_locator * locator,const char * message)532 rdfdiff_error_handler(void *data, raptor_locator *locator,
533 const char *message)
534 {
535 rdfdiff_file* file = (rdfdiff_file*)data;
536
537 if(!ignore_errors) {
538 fprintf(stderr, "%s: Error - ", program);
539 raptor_print_locator_v2(file->world, stderr, locator);
540 fprintf(stderr, " - %s\n", message);
541
542 raptor_parse_abort(file->parser);
543 }
544
545 file->error_count++;
546
547 }
548
549
550 static void
rdfdiff_warning_handler(void * data,raptor_locator * locator,const char * message)551 rdfdiff_warning_handler(void *data, raptor_locator *locator,
552 const char *message)
553 {
554 rdfdiff_file* file = (rdfdiff_file*)data;
555
556 if(!ignore_warnings) {
557 fprintf(stderr, "%s: Warning - ", program);
558 raptor_print_locator_v2(file->world, stderr, locator);
559 fprintf(stderr, " - %s\n", message);
560 }
561
562 file->warning_count++;
563
564 }
565
566
567 static rdfdiff_blank *
rdfdiff_find_blank(rdfdiff_blank * first,char * blank_id)568 rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id)
569 {
570 rdfdiff_blank *rv_blank = 0;
571 rdfdiff_blank *cur = first;
572
573 while (cur) {
574
575 if(strcmp(cur->blank_id, blank_id) == 0) {
576 rv_blank = cur;
577 break;
578 }
579
580 cur = cur->next;
581
582 }
583
584 return rv_blank;
585
586 }
587
588
589 static rdfdiff_blank *
rdfdiff_lookup_blank(rdfdiff_file * file,char * blank_id)590 rdfdiff_lookup_blank(rdfdiff_file* file, char *blank_id)
591 {
592 rdfdiff_blank *rv_blank = rdfdiff_find_blank(file->first_blank, blank_id);
593
594 if(rv_blank == NULL) {
595 rv_blank = rdfdiff_new_blank(file->world, blank_id);
596 if(rv_blank) {
597
598 if(!file->first_blank) {
599 file->first_blank = rv_blank;
600 file->last_blank = rv_blank;
601 } else {
602 file->last_blank->next = rv_blank;
603 file->last_blank = rv_blank;
604 }
605 }
606 }
607
608 return rv_blank;
609
610 }
611
612
613 static int
rdfdiff_add_blank_statement(rdfdiff_file * file,const raptor_statement * statement)614 rdfdiff_add_blank_statement(rdfdiff_file* file,
615 const raptor_statement *statement)
616 {
617 rdfdiff_blank *blank;
618 rdfdiff_link *dlink;
619
620 blank = rdfdiff_lookup_blank(file, (char *)statement->subject);
621 if(!blank)
622 goto failed;
623
624 dlink = (rdfdiff_link *)RAPTOR_MALLOC(rdfdiff_link, sizeof(rdfdiff_link));
625 if(!dlink)
626 goto failed;
627
628 dlink->statement = raptor_statement_copy(file->world, statement);
629 if(!dlink->statement) {
630 RAPTOR_FREE(rdfdiff_link, dlink);
631 goto failed;
632 }
633
634 dlink->next = NULL;
635 if(!blank->first) {
636 blank->first = dlink;
637 blank->last = dlink;
638 } else {
639 blank->last->next = dlink;
640 blank->last = dlink;
641 }
642
643 return 0;
644
645 failed:
646 fprintf(stderr, "%s: Internal Error\n", program);
647 return 1;
648 }
649
650
651 static int
rdfdiff_add_blank_statement_owner(rdfdiff_file * file,const raptor_statement * statement)652 rdfdiff_add_blank_statement_owner(rdfdiff_file* file,
653 const raptor_statement *statement)
654 {
655 rdfdiff_blank *blank;
656
657 blank = rdfdiff_lookup_blank(file, (char *)statement->object);
658 if(!blank)
659 goto failed;
660
661 blank->owner = raptor_statement_copy(file->world, statement);
662 if(!blank->owner)
663 goto failed;
664
665 return 0;
666
667 failed:
668 fprintf(stderr, "%s: Internal Error\n", program);
669 return 1;
670 }
671
672
673 static int
rdfdiff_add_statement(rdfdiff_file * file,const raptor_statement * statement)674 rdfdiff_add_statement(rdfdiff_file* file, const raptor_statement *statement)
675 {
676 int rv = 0;
677
678 rdfdiff_link *dlink = (rdfdiff_link *)RAPTOR_MALLOC(rdfdiff_link, sizeof(rdfdiff_link));
679
680 if(dlink) {
681
682 dlink->statement = raptor_statement_copy(file->world, statement);
683
684 if(dlink->statement) {
685
686 dlink->next = NULL;
687
688 if(!file->first) {
689 file->first = dlink;
690 file->last = dlink;
691 } else {
692 file->last->next = dlink;
693 file->last = dlink;
694 }
695
696 } else {
697 RAPTOR_FREE(rdfdiff_link, dlink);
698 rv = 1;
699 }
700
701 } else {
702 rv = 1;
703 }
704
705 if(rv != 0)
706 fprintf(stderr, "%s: Internal Error\n", program);
707
708 return rv;
709
710 }
711
712
713 static rdfdiff_link*
rdfdiff_statement_find(rdfdiff_file * file,const raptor_statement * statement,rdfdiff_link ** prev_p)714 rdfdiff_statement_find(rdfdiff_file* file, const raptor_statement *statement,
715 rdfdiff_link** prev_p)
716 {
717 rdfdiff_link* prev = NULL;
718 rdfdiff_link* cur = file->first;
719
720 while(cur) {
721 if(rdfdiff_statement_equals(file->world, cur->statement, statement)) {
722 if(prev_p)
723 *prev_p=prev;
724 return cur;
725 }
726 prev=cur;
727 cur=cur->next;
728 }
729
730 return NULL;
731 }
732
733
734 static int
rdfdiff_statement_exists(rdfdiff_file * file,const raptor_statement * statement)735 rdfdiff_statement_exists(rdfdiff_file* file, const raptor_statement *statement)
736 {
737 rdfdiff_link* node;
738 rdfdiff_link* prev=NULL;
739 node=rdfdiff_statement_find(file, statement, &prev);
740 return (node != NULL);
741 }
742
743
744 /*
745 * rdfdiff_collect_statements - Called when parsing "from" file to build a
746 * list of statements for comparison with those in the "to" file.
747 */
748 static void
rdfdiff_collect_statements(void * user_data,const raptor_statement * statement)749 rdfdiff_collect_statements(void *user_data, const raptor_statement *statement)
750 {
751 int rv = 0;
752 rdfdiff_file* file = (rdfdiff_file*)user_data;
753
754 if(rdfdiff_statement_exists(file, statement))
755 return;
756
757 file->statement_count++;
758
759 if(statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS ||
760 statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
761
762 if(statement->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
763 rv = rdfdiff_add_blank_statement(file, statement);
764
765 if(rv == 0 && statement->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
766 rv = rdfdiff_add_blank_statement_owner(file, statement);
767
768 } else {
769 rv = rdfdiff_add_statement(file, statement);
770 }
771
772 if(rv != 0) {
773 raptor_parse_abort(file->parser);
774 }
775
776 }
777
778
779
780 int
main(int argc,char * argv[])781 main(int argc, char *argv[])
782 {
783 raptor_world *world = NULL;
784 unsigned char *from_string=NULL;
785 unsigned char *to_string=NULL;
786 raptor_uri *from_uri=NULL;
787 raptor_uri *to_uri=NULL;
788 raptor_uri *base_uri=NULL;
789 const char *from_syntax = "rdfxml";
790 const char *to_syntax = "rdfxml";
791 int free_from_string = 0;
792 int free_to_string = 0;
793 int usage=0;
794 int help=0;
795 char *p;
796 int rv = 0;
797 rdfdiff_blank *b1;
798 rdfdiff_link *cur;
799
800 program=argv[0];
801 if((p=strrchr(program, '/')))
802 program=p+1;
803 else if((p=strrchr(program, '\\')))
804 program=p+1;
805 argv[0]=program;
806
807 world = raptor_new_world();
808 if(!world)
809 exit(1);
810 rv = raptor_world_open(world);
811 if(rv)
812 exit(1);
813
814 while (!usage && !help)
815 {
816 int c;
817 #ifdef HAVE_GETOPT_LONG
818 int option_index = 0;
819
820 c = getopt_long (argc, argv, GETOPT_STRING, long_options, &option_index);
821 #else
822 c = getopt (argc, argv, GETOPT_STRING);
823 #endif
824 if(c == -1)
825 break;
826
827 switch (c) {
828 case 0:
829 case '?': /* getopt() - unknown option */
830 usage=1;
831 break;
832
833 case 'b':
834 brief = 1;
835 break;
836
837 case 'h':
838 help=1;
839 break;
840
841 case 'f':
842 if(optarg)
843 from_syntax = optarg;
844 break;
845
846 case 't':
847 if(optarg)
848 to_syntax = optarg;
849 break;
850
851 case 'u':
852 if(optarg)
853 base_uri = raptor_new_uri_v2(world, (const unsigned char*)optarg);
854 break;
855
856 }
857
858 }
859
860 if(optind != argc-2)
861 help = 1;
862
863 if(usage) {
864 if(usage>1) {
865 fprintf(stderr, title_format_string, raptor_version_string);
866 fputs(raptor_short_copyright_string, stderr);
867 fputc('\n', stderr);
868 }
869 fprintf(stderr, "Try `%s " HELP_ARG(h, help) "' for more information.\n",
870 program);
871 rv = 1;
872 goto exit;
873 }
874
875 if(help) {
876 printf("Usage: %s [OPTIONS] <from URI> <to URI>\n", program);
877 printf(title_format_string, raptor_version_string);
878 puts(raptor_short_copyright_string);
879 puts("Find differences between two RDF files.");
880 puts("\nOPTIONS:");
881 puts(HELP_TEXT("h", "help ", "Print this help, then exit"));
882 puts(HELP_TEXT("b", "brief ", "Report only whether files differ"));
883 puts(HELP_TEXT("u BASE-URI", "base-uri BASE-URI ", "Set the base URI for the files"));
884 puts(HELP_TEXT("f FORMAT", "from-format FORMAT ", "Format of <from URI> (default is rdfxml)"));
885 puts(HELP_TEXT("t FORMAT", "to-format FORMAT ", "Format of <to URI> (default is rdfxml)"));
886 rv = 1;
887 goto exit;
888 }
889
890 from_string = (unsigned char *)argv[optind++];
891 to_string = (unsigned char *)argv[optind];
892
893 if(!access((const char *)from_string, R_OK)) {
894 char *filename = (char *)from_string;
895 from_string = raptor_uri_filename_to_uri_string(filename);
896 if(!from_string) {
897 fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
898 rv = 2;
899 goto exit;
900 }
901 free_from_string = 1;
902 }
903
904 if(!access((const char *)to_string, R_OK)) {
905 char *filename = (char *)to_string;
906 to_string = raptor_uri_filename_to_uri_string(filename);
907 if(!to_string) {
908 fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
909 rv = 2;
910 goto exit;
911 }
912 free_to_string = 1;
913 }
914
915 if(from_string) {
916 from_uri = raptor_new_uri_v2(world, from_string);
917 if(!from_uri) {
918 fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
919 rv = 2;
920 goto exit;
921 }
922 }
923
924 if(to_string) {
925 to_uri = raptor_new_uri_v2(world, to_string);
926 if(!to_uri) {
927 fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
928 rv = 2;
929 goto exit;
930 }
931 }
932
933 /* create and init "from" data structures */
934 from_file = rdfdiff_new_file(world, from_string, from_syntax);
935 if(!from_file) {
936 rv = 2;
937 goto exit;
938 }
939
940 /* create and init "to" data structures */
941 to_file = rdfdiff_new_file(world, to_string, to_syntax);
942 if(!to_file) {
943 rv = 2;
944 goto exit;
945 }
946
947 /* parse the files */
948 raptor_set_statement_handler(from_file->parser, from_file,
949 rdfdiff_collect_statements);
950
951 if(raptor_parse_uri(from_file->parser, from_uri, base_uri)) {
952 fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
953 from_string, from_syntax);
954 rv = 1;
955 goto exit;
956 } else {
957
958 /* Note intentional from_uri as base_uri */
959 raptor_set_statement_handler(to_file->parser, to_file,
960 rdfdiff_collect_statements);
961 if(raptor_parse_uri(to_file->parser, to_uri, base_uri ? base_uri: from_uri)) {
962 fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
963 to_string, to_syntax);
964 rv = 1;
965 goto exit;
966 }
967 }
968
969
970 /* Compare triples with no blank nodes */
971 cur = to_file->first;
972 while(cur) {
973 rdfdiff_link* node;
974 rdfdiff_link* prev;
975 node=rdfdiff_statement_find(from_file, cur->statement, &prev);
976 if(node) {
977 /* exists in from file - remove it from the list */
978 if(from_file->first == node) {
979 from_file->first = node->next;
980 } else {
981 prev->next = node->next;
982 }
983 raptor_free_statement(world, node->statement);
984 RAPTOR_FREE(rdfdiff_link, node);
985 } else {
986 if(!brief) {
987 if(emit_from_header) {
988 fprintf(stderr, "Statements in %s but not in %s\n",
989 to_file->name, from_file->name);
990 emit_from_header = 0;
991 }
992
993 fprintf(stderr, "< ");
994 raptor_print_statement_v1(world, cur->statement, stderr);
995 fprintf(stderr, "\n");
996 }
997
998 to_file->difference_count++;
999 }
1000 cur=cur->next;
1001 }
1002
1003
1004 /* Now compare the blank nodes */
1005 b1 = to_file->first_blank;
1006 while (b1) {
1007
1008 rdfdiff_blank *b2 = from_file->first_blank;
1009
1010 while (b2) {
1011
1012 if(!b2->matched && rdfdiff_blank_equals(b1, b2, to_file, from_file)) {
1013 b1->matched = 1;
1014 b2->matched = 1;
1015 break;
1016 }
1017
1018 b2 = b2->next;
1019
1020 }
1021
1022 if(b2 == 0) {
1023 if(!brief) {
1024 #if 0
1025 fprintf(stderr, "< ");
1026 raptor_print_statement(b1->owner, stderr);
1027 fprintf(stderr, "\n");
1028 #else
1029 if(emit_from_header) {
1030 fprintf(stderr, "Statements in %s but not in %s\n", to_file->name, from_file->name);
1031 emit_from_header = 0;
1032 }
1033
1034 fprintf(stderr, "< anonymous node %s\n", b1->blank_id);
1035 #endif
1036 }
1037
1038 to_file->difference_count++;
1039 }
1040
1041 b1 = b1->next;
1042
1043 }
1044
1045 if(from_file->first) {
1046 /* The entrys left in from_file have not been found in to_file. */
1047 if(!brief) {
1048
1049 if(emit_to_header) {
1050 fprintf(stderr, "Statements in %s but not in %s\n", from_file->name,
1051 to_file->name);
1052 emit_to_header = 0;
1053 }
1054
1055 cur = from_file->first;
1056 while (cur) {
1057 if(!brief) {
1058 fprintf(stderr, "> ");
1059 raptor_print_statement_v1(world, cur->statement, stderr);
1060 fprintf(stderr, "\n");
1061 }
1062
1063 cur = cur->next;
1064 from_file->difference_count++;
1065 }
1066 }
1067
1068 }
1069
1070 if(from_file->first_blank) {
1071 rdfdiff_blank *blank = from_file->first_blank;
1072 while (blank) {
1073
1074 if(!blank->matched) {
1075 if(!brief) {
1076 #if 0
1077 fprintf(stderr, "> ");
1078 raptor_print_statement(blank->owner, stderr);
1079 fprintf(stderr, "\n");
1080 #else
1081 if(emit_to_header) {
1082 fprintf(stderr, "Statements in %s but not in %s\n", from_file->name, to_file->name);
1083 emit_to_header = 0;
1084 }
1085 fprintf(stderr, "> anonymous node %s\n", blank->blank_id);
1086 #endif
1087 }
1088 from_file->difference_count++;
1089 }
1090
1091 blank = blank->next;
1092
1093 }
1094
1095 }
1096
1097 if(!(from_file->difference_count == 0 &&
1098 to_file->difference_count == 0)) {
1099
1100 if(brief)
1101 fprintf(stderr, "Files differ\n");
1102
1103 rv = 1;
1104 }
1105
1106 exit:
1107
1108 if(base_uri)
1109 raptor_free_uri_v2(world, base_uri);
1110
1111 if(from_file)
1112 rdfdiff_free_file(from_file);
1113
1114 if(to_file)
1115 rdfdiff_free_file(to_file);
1116
1117 if(free_from_string)
1118 raptor_free_memory(from_string);
1119
1120 if(free_to_string)
1121 raptor_free_memory(to_string);
1122
1123 if(from_uri)
1124 raptor_free_uri_v2(world, from_uri);
1125
1126 if(to_uri)
1127 raptor_free_uri_v2(world, to_uri);
1128
1129 raptor_free_world(world);
1130
1131 return rv;
1132
1133 }
1134
1135