1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_results_compare.c - Rasqal Class for comparing Query Results
4  *
5  * Copyright (C) 2014, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #ifdef HAVE_STDLIB_H
34 #include <stdlib.h>
35 #endif
36 #include <string.h>
37 
38 #include "rasqal.h"
39 #include "rasqal_internal.h"
40 
41 
42 #ifndef STANDALONE
43 
44 /**
45  * rasqal_results_compare:
46  * @vt: variables table
47  * @defined_in_map: of size @variables_count
48  * @first_count: number of variables in first query result
49  * @second_count: number of variables in second query result
50  * @variables_count: number of variables in @vt and @defined_in_map
51  * @variables_in_both_count: number of shared variables in both query results
52  *
53  * Lookup data constructed for comparing two query results to enable
54  * quick mapping between values.
55  *
56  */
57 struct rasqal_results_compare_s {
58   rasqal_world* world;
59 
60   rasqal_query_results* first_qr;
61   const char* first_qr_label;
62   rasqal_query_results* second_qr;
63   const char* second_qr_label;
64 
65   void* log_user_data;
66   raptor_log_handler log_handler;
67   raptor_log_message message;
68 
69   rasqal_variables_table* vt;
70   int* defined_in_map;
71   unsigned int first_count;
72   unsigned int second_count;
73   unsigned int variables_count;
74   unsigned int variables_in_both_count;
75 };
76 
77 
78 
79 rasqal_results_compare*
rasqal_new_results_compare(rasqal_world * world,rasqal_query_results * first_qr,const char * first_qr_label,rasqal_query_results * second_qr,const char * second_qr_label)80 rasqal_new_results_compare(rasqal_world* world,
81                            rasqal_query_results *first_qr, const char* first_qr_label,
82                            rasqal_query_results *second_qr, const char* second_qr_label)
83 {
84   rasqal_results_compare* rrc = NULL;
85   rasqal_variables_table* first_vt;
86   rasqal_variables_table* second_vt;
87   unsigned int i;
88   unsigned int size;
89 
90   first_vt = rasqal_query_results_get_variables_table(first_qr);
91   second_vt = rasqal_query_results_get_variables_table(second_qr);
92 
93   rrc = RASQAL_CALLOC(rasqal_results_compare*, 1, sizeof(*rrc));
94   if(!rrc)
95     return NULL;
96 
97   rrc->world = world;
98 
99   rrc->first_qr = first_qr;
100   rrc->first_qr_label = first_qr_label;
101   rrc->second_qr = second_qr;
102   rrc->second_qr_label = second_qr_label;
103 
104   rrc->message.code = -1;
105   rrc->message.domain = RAPTOR_DOMAIN_NONE;
106   rrc->message.level = RAPTOR_LOG_LEVEL_NONE;
107   rrc->message.locator = NULL;
108   rrc->message.text = NULL;
109 
110   rrc->first_count = RASQAL_GOOD_CAST(unsigned int, rasqal_variables_table_get_total_variables_count(first_vt));
111   rrc->second_count = RASQAL_GOOD_CAST(unsigned int, rasqal_variables_table_get_total_variables_count(second_vt));
112   rrc->variables_count = 0;
113 
114   size = (rrc->first_count + rrc->second_count) << 1;
115   rrc->defined_in_map = RASQAL_CALLOC(int*, size, sizeof(int));
116   if(!rrc->defined_in_map) {
117     RASQAL_FREE(rasqal_results_compare, rrc);
118     return NULL;
119   }
120   for(i = 0; i < size; i++)
121     rrc->defined_in_map[i] = -1;
122 
123   rrc->vt = rasqal_new_variables_table(world);
124   if(!rrc->vt) {
125     RASQAL_FREE(int*, rrc->defined_in_map);
126     RASQAL_FREE(rasqal_results_compare, rrc);
127     return NULL;
128   }
129 
130   first_vt = rasqal_query_results_get_variables_table(first_qr);
131   for(i = 0; i < rrc->first_count; i++) {
132     rasqal_variable *v;
133     rasqal_variable *v2;
134 
135     v = rasqal_variables_table_get(first_vt, RASQAL_GOOD_CAST(int, i));
136     v2 = rasqal_variables_table_add2(rrc->vt, v->type, v->name, 0, NULL);
137     rrc->defined_in_map[(v2->offset)<<1] = RASQAL_GOOD_CAST(int, i);
138     rasqal_free_variable(v2);
139   }
140 
141   second_vt = rasqal_query_results_get_variables_table(second_qr);
142   for(i = 0; i < rrc->second_count; i++) {
143     rasqal_variable *v;
144     rasqal_variable *v2;
145     int free_v2 = 0;
146 
147     v = rasqal_variables_table_get(second_vt, RASQAL_GOOD_CAST(int, i));
148     v2 = rasqal_variables_table_get_by_name(rrc->vt, v->type, v->name);
149     if(!v2) {
150       free_v2 = 1;
151       v2 = rasqal_variables_table_add2(rrc->vt, v->type, v->name, 0, NULL);
152     }
153     rrc->defined_in_map[1 + ((v2->offset)<<1)] = RASQAL_GOOD_CAST(int, i);
154     if(free_v2)
155       rasqal_free_variable(v2);
156   }
157 
158   rrc->variables_count = RASQAL_GOOD_CAST(unsigned int, rasqal_variables_table_get_total_variables_count(rrc->vt));
159 
160   for(i = 0; i < rrc->variables_count; i++) {
161     if(rrc->defined_in_map[(i<<1)] >= 0 && rrc->defined_in_map[1 + (i<<1)] >= 0)
162       rrc->variables_in_both_count++;
163   }
164 
165   return rrc;
166 }
167 
168 
169 void
rasqal_free_results_compare(rasqal_results_compare * rrc)170 rasqal_free_results_compare(rasqal_results_compare* rrc)
171 {
172   if(!rrc)
173     return;
174 
175   if(rrc->defined_in_map)
176     RASQAL_FREE(rasqal_variable**, rrc->defined_in_map);
177   if(rrc->vt)
178     rasqal_free_variables_table(rrc->vt);
179   RASQAL_FREE(rasqal_results_compare, rrc);
180 }
181 
182 
183 /**
184  * rasqal_results_compare_set_log_handler:
185  * @rrc: results compare object
186  * @log_user_data: log handler user data
187  * @log_handler: log handler
188  *
189  * Set query results comparer log handler
190  *
191  */
192 void
rasqal_results_compare_set_log_handler(rasqal_results_compare * rrc,void * log_user_data,raptor_log_handler log_handler)193 rasqal_results_compare_set_log_handler(rasqal_results_compare* rrc,
194                                        void* log_user_data,
195                                        raptor_log_handler log_handler)
196 {
197   rrc->log_user_data = log_user_data;
198   rrc->log_handler = log_handler;
199 }
200 
201 
202 /**
203  * rasqal_results_compare_variables_equal:
204  * @rrc: results compare object
205  *
206  * Test if two results have the same sets of variables
207  *
208  * Return value: non-0 if the results have the same sets of variables
209  */
210 int
rasqal_results_compare_variables_equal(rasqal_results_compare * rrc)211 rasqal_results_compare_variables_equal(rasqal_results_compare* rrc)
212 {
213   unsigned int i;
214   unsigned int count = rrc->variables_count;
215 
216   /* If no variables in common, not equal */
217   if(!rrc->variables_in_both_count)
218     return 0;
219 
220   /* If variables count are different, not equal */
221   if(rrc->first_count != rrc->second_count)
222     return 0;
223 
224   for(i = 0; i < count; i++) {
225     /* If any variable is not in both, not equal */
226     if(rrc->defined_in_map[i<<1] < 0 ||
227        rrc->defined_in_map[1 + (i<<1)] < 0 )
228       return 0;
229   }
230 
231   return 1;
232 }
233 
234 
235 /**
236  * rasqal_results_compare_get_variable_by_offset:
237  * @map: results comparible
238  * @idx: variable index
239  *
240  * Get variable by index
241  *
242  * Return value: pointer to shared #rasqal_variable or NULL if out of range
243  */
244 rasqal_variable*
rasqal_results_compare_get_variable_by_offset(rasqal_results_compare * rrc,int idx)245 rasqal_results_compare_get_variable_by_offset(rasqal_results_compare* rrc, int idx)
246 {
247   return rasqal_variables_table_get(rrc->vt, idx);
248 }
249 
250 
251 /**
252  * rasqal_results_compare_get_variable_offset_for_result:
253  * @map: results comparible
254  * @var_idx: variable index
255  * @qr_index: results index 0 (first) or 1 (second)
256  *
257  * Get variable index in a query results by variable index
258  *
259  * Return value: index into query result list of variables or <0 if @var_idx or @qr_index is out of range
260  */
261 int
rasqal_results_compare_get_variable_offset_for_result(rasqal_results_compare * rrc,int var_idx,int qr_index)262 rasqal_results_compare_get_variable_offset_for_result(rasqal_results_compare* rrc,
263                                                       int var_idx, int qr_index)
264 {
265   if(qr_index < 0 || qr_index > 1)
266     return -1;
267 
268   if(!rasqal_results_compare_get_variable_by_offset(rrc, var_idx))
269     return -1;
270 
271   return rrc->defined_in_map[qr_index + (var_idx << 1)];
272 }
273 
274 
275 void
rasqal_print_results_compare(FILE * handle,rasqal_results_compare * rrc)276 rasqal_print_results_compare(FILE *handle, rasqal_results_compare* rrc)
277 {
278   unsigned int count = rrc->variables_count;
279   rasqal_variables_table* vt = rrc->vt;
280   unsigned int i;
281   char first_qr[4];
282   char second_qr[4];
283 
284   fprintf(handle,
285           "Results variable compare map: total variables: %u  shared variables: %u\n",
286           count, rrc->variables_in_both_count);
287   for(i = 0; i < count; i++) {
288     rasqal_variable *v = rasqal_variables_table_get(vt, RASQAL_GOOD_CAST(int, i));
289     int offset1 = rrc->defined_in_map[i<<1];
290     int offset2 = rrc->defined_in_map[1 + (i<<1)];
291 
292     if(offset1 < 0)
293       *first_qr = '\0';
294     else
295       sprintf(first_qr, "%2d", offset1);
296 
297     if(offset2 < 0)
298       *second_qr = '\0';
299     else
300       sprintf(second_qr, "%2d", offset2);
301 
302     fprintf(handle,
303             "  Variable %10s   offsets first: %-3s  second: %-3s  %s\n",
304             v->name, first_qr, second_qr,
305             ((offset1 >= 0 && offset2 >= 0) ? "SHARED" : ""));
306   }
307 }
308 
309 
310 /**
311  * rasqal_results_compare_compare:
312  * @cqr: query results object
313  *
314  * Run a query results comparison
315  *
316  * Return value: non-0 if equal
317  */
318 int
rasqal_results_compare_compare(rasqal_results_compare * rrc)319 rasqal_results_compare_compare(rasqal_results_compare* rrc)
320 {
321   int differences = 0;
322   int rowi;
323   int size1;
324   int size2;
325   int row_differences_count = 0;
326 
327   size1 = rasqal_query_results_get_bindings_count(rrc->first_qr);
328   size2 = rasqal_query_results_get_bindings_count(rrc->second_qr);
329 
330   if(size1 != size2) {
331     rrc->message.level = RAPTOR_LOG_LEVEL_ERROR;
332     rrc->message.text = "Results have different numbers of bindings";
333     if(rrc->log_handler)
334       rrc->log_handler(rrc->log_user_data, &rrc->message);
335 
336     differences++;
337     goto done;
338   }
339 
340   if(size1 > 0) {
341     /* If there are variables; check they match */
342     if(!rrc->variables_in_both_count) {
343       rrc->message.level = RAPTOR_LOG_LEVEL_ERROR;
344       rrc->message.text = "Results have no common variables";
345       if(rrc->log_handler)
346         rrc->log_handler(rrc->log_user_data, &rrc->message);
347 
348       differences++;
349       goto done;
350     }
351 
352     if(!rasqal_results_compare_variables_equal(rrc)) {
353       rrc->message.level = RAPTOR_LOG_LEVEL_ERROR;
354       rrc->message.text = "Results have different sets of variables";
355       if(rrc->log_handler)
356         rrc->log_handler(rrc->log_user_data, &rrc->message);
357 
358       differences++;
359       goto done;
360     }
361   }
362 
363   /* set results to be stored? */
364 
365   /* sort rows by something ?  As long as the sort is the same it
366    * probably does not matter what the method is. */
367 
368   /* what to do about blank nodes? */
369 
370   /* for each row */
371   for(rowi = 0; 1; rowi++) {
372     unsigned int bindingi;
373     rasqal_row* row1 = rasqal_query_results_get_row_by_offset(rrc->first_qr, rowi);
374     rasqal_row* row2 = rasqal_query_results_get_row_by_offset(rrc->second_qr, rowi);
375     int this_row_different = 0;
376 
377     if(!row1 && !row2)
378       break;
379 
380     /* for each variable (already know they are the same set) */
381     for(bindingi = 0; bindingi < rrc->variables_count; bindingi++) {
382       rasqal_variable* v;
383       const unsigned char* name;
384       int ix1;
385       int ix2;
386       rasqal_literal *value1;
387       rasqal_literal *value2;
388       int error = 0;
389 
390       v = rasqal_results_compare_get_variable_by_offset(rrc, RASQAL_GOOD_CAST(int, bindingi));
391       name = v->name;
392 
393       ix1 = rasqal_results_compare_get_variable_offset_for_result(rrc, RASQAL_GOOD_CAST(int, bindingi), 0);
394       ix2 = rasqal_results_compare_get_variable_offset_for_result(rrc, RASQAL_GOOD_CAST(int, bindingi), 1);
395 
396       value1 = rasqal_query_results_get_binding_value(rrc->first_qr, ix1);
397       value2 = rasqal_query_results_get_binding_value(rrc->second_qr, ix2);
398 
399       /* Blank nodes always match each other */
400       if(value1 && value1->type ==  RASQAL_LITERAL_BLANK &&
401          value2 && value2->type ==  RASQAL_LITERAL_BLANK)
402         continue;
403 
404       /* should have compare as native flag?
405        * RASQAL_COMPARE_XQUERY doesn't compare all values
406        */
407       if(!rasqal_literal_equals_flags(value1, value2, RASQAL_COMPARE_XQUERY,
408                                       &error)) {
409         /* if different report it */
410         raptor_world* raptor_world_ptr;
411         void *string;
412         size_t length;
413         raptor_iostream* string_iostr;
414 
415         raptor_world_ptr = rasqal_world_get_raptor(rrc->world);
416 
417         string_iostr = raptor_new_iostream_to_string(raptor_world_ptr,
418                                                      &string, &length,
419                                                      (raptor_data_malloc_handler)malloc);
420 
421         raptor_iostream_counted_string_write("Difference in row ", 18,
422                                              string_iostr);
423         raptor_iostream_decimal_write(rowi + 1,
424                                       string_iostr);
425         raptor_iostream_counted_string_write(" binding '", 10,
426                                              string_iostr);
427         raptor_iostream_string_write(name,
428                                      string_iostr);
429         raptor_iostream_counted_string_write("' ", 2,
430                                              string_iostr);
431         raptor_iostream_string_write(rrc->first_qr_label, string_iostr);
432         raptor_iostream_counted_string_write(" value ", 7,
433                                              string_iostr);
434         rasqal_literal_write(value1,
435                              string_iostr);
436         raptor_iostream_write_byte(' ',
437                                    string_iostr);
438         raptor_iostream_string_write(rrc->second_qr_label,
439                                      string_iostr);
440         raptor_iostream_counted_string_write(" value ", 7,
441                                              string_iostr);
442         rasqal_literal_write(value2,
443                              string_iostr);
444         raptor_iostream_write_byte(' ',
445                                    string_iostr);
446 
447         /* this allocates and copies result into 'string' */
448         raptor_free_iostream(string_iostr);
449 
450         rrc->message.level = RAPTOR_LOG_LEVEL_ERROR;
451         rrc->message.text = (const char*)string;
452         if(rrc->log_handler)
453           rrc->log_handler(rrc->log_user_data, &rrc->message);
454 
455         free(string);
456 
457         differences++;
458         this_row_different = 1;
459       }
460     } /* end for each var */
461 
462     if(row1)
463       rasqal_free_row(row1);
464     if(row2)
465       rasqal_free_row(row2);
466 
467     if(this_row_different)
468       row_differences_count++;
469 
470     rasqal_query_results_next(rrc->first_qr);
471     rasqal_query_results_next(rrc->second_qr);
472   } /* end for each row */
473 
474   if(row_differences_count) {
475     rrc->message.level = RAPTOR_LOG_LEVEL_ERROR;
476     rrc->message.text = "Results have different values";
477     if(rrc->log_handler)
478       rrc->log_handler(rrc->log_user_data, &rrc->message);
479   }
480 
481   done:
482   return (differences == 0);
483 }
484 
485 
486 #endif /* not STANDALONE */
487 
488 
489 
490 #ifdef STANDALONE
491 
492 /* some more prototypes */
493 int main(int argc, char *argv[]);
494 
495 #define NTESTS 2
496 
497 const struct {
498   const char* first_qr_string;
499   const char* second_qr_string;
500   int expected_vars_count;
501   int expected_rows_count;
502   int expected_equality;
503 } expected_data[NTESTS] = {
504   {
505     "a\tb\tc\td\te\tf\n\"a\"\t\"b\"\t\"c\"\t\"d\"\t\"e\"\t\"f\"\n",
506     "a\tb\tc\td\te\tf\n\"a\"\t\"b\"\t\"c\"\t\"d\"\t\"e\"\t\"f\"\n",
507     6, 1, 1
508   },
509   {
510     "a\tb\tc\td\te\tf\n\"a\"\t\"b\"\t\"c\"\t\"d\"\t\"e\"\t\"f\"\n",
511     "d\tf\tc\ta\te\tb\n\"d\"\t\"f\"\t\"c\"\t\"a\"\t\"e\"\t\"b\"\n",
512     6, 1, 1
513   }
514 };
515 
516 
517 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
518 static void
print_bindings_results_simple(rasqal_query_results * results,FILE * output)519 print_bindings_results_simple(rasqal_query_results *results, FILE* output)
520 {
521   while(!rasqal_query_results_finished(results)) {
522     int i;
523 
524     fputs("row: [", output);
525     for(i = 0; i < rasqal_query_results_get_bindings_count(results); i++) {
526       const unsigned char *name;
527       rasqal_literal *value;
528 
529       name = rasqal_query_results_get_binding_name(results, i);
530       value = rasqal_query_results_get_binding_value(results, i);
531 
532       if(i > 0)
533         fputs(", ", output);
534 
535       fprintf(output, "%s=", name);
536       rasqal_literal_print(value, output);
537     }
538     fputs("]\n", output);
539 
540     rasqal_query_results_next(results);
541   }
542 }
543 #endif
544 
545 
546 int
main(int argc,char * argv[])547 main(int argc, char *argv[])
548 {
549   const char *program = rasqal_basename(argv[0]);
550   rasqal_world* world = NULL;
551   raptor_world* raptor_world_ptr;
552   int failures = 0;
553   int i;
554   rasqal_query_results_type type = RASQAL_QUERY_RESULTS_BINDINGS;
555 
556   world = rasqal_new_world(); rasqal_world_open(world);
557 
558   raptor_world_ptr = rasqal_world_get_raptor(world);
559 
560   for(i = 0; i < NTESTS; i++) {
561     raptor_uri* base_uri = raptor_new_uri(raptor_world_ptr,
562                                           (const unsigned char*)"http://example.org/");
563     rasqal_query_results *first_qr;
564     rasqal_query_results *second_qr = NULL;
565     int expected_equality = expected_data[i].expected_equality;
566     rasqal_results_compare* rrc;
567     int equal;
568 
569     first_qr = rasqal_new_query_results_from_string(world,
570                                                     type,
571                                                     base_uri,
572                                                     expected_data[i].first_qr_string,
573                                                     0);
574 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
575     RASQAL_DEBUG1("First query result from string");
576     print_bindings_results_simple(first_qr, stderr);
577     rasqal_query_results_rewind(first_qr);
578 #endif
579 
580     second_qr = rasqal_new_query_results_from_string(world,
581                                                      type,
582                                                      base_uri,
583                                                      expected_data[i].second_qr_string,
584                                                      0);
585 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
586     RASQAL_DEBUG1("Second query result from string");
587     print_bindings_results_simple(second_qr, stderr);
588     rasqal_query_results_rewind(second_qr);
589 #endif
590 
591     raptor_free_uri(base_uri);
592 
593     rrc = rasqal_new_results_compare(world, first_qr, "first", second_qr, "second");
594     if(!rrc) {
595       fprintf(stderr, "%s: failed to create results compatible\n", program);
596       failures++;
597     } else {
598       rasqal_print_results_compare(stderr, rrc);
599 
600       equal = rasqal_results_compare_variables_equal(rrc);
601       RASQAL_DEBUG4("%s: equal results test %d returned %d\n", program, i, equal);
602       if(equal != expected_equality) {
603         fprintf(stderr,
604                 "%s: FAILED equal results test %d returned %d  expected %d\n",
605                 program, i, equal, expected_equality);
606         failures++;
607       }
608     }
609 
610     if(first_qr)
611       rasqal_free_query_results(first_qr);
612     if(second_qr)
613       rasqal_free_query_results(second_qr);
614     if(rrc)
615       rasqal_free_results_compare(rrc);
616   }
617 
618   if(world)
619     rasqal_free_world(world);
620 
621   return failures;
622 }
623 
624 #endif /* STANDALONE */
625