1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_rowsource_union.c - Rasqal union rowsource class
4  *
5  * Copyright (C) 2008-2009, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  */
22 
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 
38 #include <raptor.h>
39 
40 #include "rasqal.h"
41 #include "rasqal_internal.h"
42 
43 
44 #define DEBUG_FH stderr
45 
46 #ifndef STANDALONE
47 
48 typedef struct
49 {
50   rasqal_rowsource* left;
51 
52   rasqal_rowsource* right;
53 
54   /* array of size (number of variables in @right) with this row offset value */
55   int* right_map;
56 
57   /* array of size (number of variables in @right) holding right row temporarily */
58   rasqal_literal** right_tmp_values;
59 
60   /* 0 = reading from left rs, 1 = reading from right rs, 2 = finished */
61   int state;
62 
63   int failed;
64 
65   /* row offset for read_row() */
66   int offset;
67 } rasqal_union_rowsource_context;
68 
69 
70 static int
rasqal_union_rowsource_init(rasqal_rowsource * rowsource,void * user_data)71 rasqal_union_rowsource_init(rasqal_rowsource* rowsource, void *user_data)
72 {
73   rasqal_union_rowsource_context* con;
74 
75   con = (rasqal_union_rowsource_context*)user_data;
76   con->state = 0;
77 
78   con->failed = 0;
79 
80   rasqal_rowsource_set_requirements(con->left, RASQAL_ROWSOURCE_REQUIRE_RESET);
81   rasqal_rowsource_set_requirements(con->right, RASQAL_ROWSOURCE_REQUIRE_RESET);
82 
83   return 0;
84 }
85 
86 
87 static int
rasqal_union_rowsource_finish(rasqal_rowsource * rowsource,void * user_data)88 rasqal_union_rowsource_finish(rasqal_rowsource* rowsource, void *user_data)
89 {
90   rasqal_union_rowsource_context* con;
91   con = (rasqal_union_rowsource_context*)user_data;
92   if(con->left)
93     rasqal_free_rowsource(con->left);
94 
95   if(con->right)
96     rasqal_free_rowsource(con->right);
97 
98   if(con->right_map)
99     RASQAL_FREE(int, con->right_map);
100 
101   if(con->right_tmp_values)
102     RASQAL_FREE(ptrarray, con->right_tmp_values);
103 
104   RASQAL_FREE(rasqal_union_rowsource_context, con);
105 
106   return 0;
107 }
108 
109 
110 static int
rasqal_union_rowsource_ensure_variables(rasqal_rowsource * rowsource,void * user_data)111 rasqal_union_rowsource_ensure_variables(rasqal_rowsource* rowsource,
112                                         void *user_data)
113 {
114   rasqal_union_rowsource_context* con;
115   int map_size;
116   int i;
117 
118   con = (rasqal_union_rowsource_context*)user_data;
119 
120   if(rasqal_rowsource_ensure_variables(con->left))
121     return 1;
122 
123   if(rasqal_rowsource_ensure_variables(con->right))
124     return 1;
125 
126   map_size = rasqal_rowsource_get_size(con->right);
127   con->right_map = RASQAL_MALLOC(int*, RASQAL_GOOD_CAST(size_t,
128                                                         sizeof(int) * RASQAL_GOOD_CAST(size_t, map_size)));
129   if(!con->right_map)
130     return 1;
131 
132   con->right_tmp_values = RASQAL_MALLOC(rasqal_literal**,
133                                         sizeof(rasqal_literal*) * RASQAL_GOOD_CAST(size_t, map_size));
134   if(!con->right_tmp_values)
135     return 1;
136 
137   rowsource->size = 0;
138 
139   /* copy in variables from left rowsource */
140   if(rasqal_rowsource_copy_variables(rowsource, con->left))
141     return 1;
142 
143   /* add any new variables not already seen from right rowsource */
144   for(i = 0; i < map_size; i++) {
145     rasqal_variable* v;
146     int offset;
147 
148     v = rasqal_rowsource_get_variable_by_offset(con->right, i);
149     if(!v)
150       break;
151     offset = rasqal_rowsource_add_variable(rowsource, v);
152     if(offset < 0)
153       return 1;
154 
155     con->right_map[i] = offset;
156   }
157 
158   return 0;
159 }
160 
161 
162 static void
rasqal_union_rowsource_adjust_right_row(rasqal_rowsource * rowsource,rasqal_union_rowsource_context * con,rasqal_row * row)163 rasqal_union_rowsource_adjust_right_row(rasqal_rowsource *rowsource,
164                                         rasqal_union_rowsource_context* con,
165                                         rasqal_row *row)
166 {
167   rasqal_rowsource *right_rowsource = con->right;
168   int i;
169 
170   /* save right row values */
171   for(i = 0; i < right_rowsource->size; i++)
172     con->right_tmp_values[i] = row->values[i];
173 
174   /* NULL out other pointers */
175   for(i = 0; i < rowsource->size; i++)
176     row->values[i] = NULL;
177 
178   /* map them into correct order in result row */
179   for(i = 0; i < right_rowsource->size; i++) {
180     int offset = con->right_map[i];
181     row->values[offset] = con->right_tmp_values[i];
182   }
183 }
184 
185 
186 static rasqal_row*
rasqal_union_rowsource_read_row(rasqal_rowsource * rowsource,void * user_data)187 rasqal_union_rowsource_read_row(rasqal_rowsource* rowsource, void *user_data)
188 {
189   rasqal_union_rowsource_context* con;
190   rasqal_row* row = NULL;
191 
192   con = (rasqal_union_rowsource_context*)user_data;
193 
194   if(con->failed || con->state > 1)
195     return NULL;
196 
197   if(con->state == 0) {
198     row = rasqal_rowsource_read_row(con->left);
199 #ifdef RASQAL_DEBUG
200     RASQAL_DEBUG2("rowsource %p read left row : ", rowsource);
201     if(row)
202       rasqal_row_print(row, stderr);
203     else
204       fputs("NONE", stderr);
205     fputs("\n", stderr);
206 #endif
207 
208     if(!row)
209       con->state = 1;
210     else {
211       /* otherwise: rows from left are correct order but wrong size */
212       if(rasqal_row_expand_size(row, rowsource->size)) {
213         rasqal_free_row(row);
214         return NULL;
215       }
216     }
217   }
218   if(!row && con->state == 1) {
219     row = rasqal_rowsource_read_row(con->right);
220 #ifdef RASQAL_DEBUG
221     RASQAL_DEBUG2("rowsource %p read right row : ", rowsource);
222     if(row)
223       rasqal_row_print(row, stderr);
224     else
225       fputs("NONE", stderr);
226     fputs("\n", stderr);
227 #endif
228 
229     if(!row)
230       /* finished */
231       con->state = 2;
232     else {
233       if(rasqal_row_expand_size(row, rowsource->size)) {
234         rasqal_free_row(row);
235         return NULL;
236       }
237       /* transform row from right to match new projection */
238       rasqal_union_rowsource_adjust_right_row(rowsource, con, row);
239     }
240   }
241 
242   if(row) {
243     rasqal_row_set_rowsource(row, rowsource);
244     row->offset = con->offset++;
245   }
246 
247   return row;
248 }
249 
250 
251 static raptor_sequence*
rasqal_union_rowsource_read_all_rows(rasqal_rowsource * rowsource,void * user_data)252 rasqal_union_rowsource_read_all_rows(rasqal_rowsource* rowsource,
253                                      void *user_data)
254 {
255   rasqal_union_rowsource_context* con;
256   raptor_sequence* seq1 = NULL;
257   raptor_sequence* seq2 = NULL;
258   int left_size;
259   int right_size;
260   int i;
261 
262   con = (rasqal_union_rowsource_context*)user_data;
263 
264   if(con->failed)
265     return NULL;
266 
267   seq1 = rasqal_rowsource_read_all_rows(con->left);
268   if(!seq1) {
269     con->failed = 1;
270     return NULL;
271   }
272 
273   seq2 = rasqal_rowsource_read_all_rows(con->right);
274   if(!seq2) {
275     con->failed = 1;
276     raptor_free_sequence(seq1);
277     return NULL;
278   }
279 
280 #ifdef RASQAL_DEBUG
281   fprintf(DEBUG_FH, "left rowsource (%d vars):\n",
282           rasqal_rowsource_get_size(con->left));
283   rasqal_rowsource_print_row_sequence(con->left, seq1, DEBUG_FH);
284 
285   fprintf(DEBUG_FH, "right rowsource (%d vars):\n",
286           rasqal_rowsource_get_size(con->right));
287   rasqal_rowsource_print_row_sequence(con->right, seq2, DEBUG_FH);
288 #endif
289 
290   /* transform rows from left to match new projection */
291   left_size = raptor_sequence_size(seq1);
292   for(i = 0; i < left_size; i++) {
293     rasqal_row *row = (rasqal_row*)raptor_sequence_get_at(seq1, i);
294     /* rows from left are correct order but wrong size */
295     rasqal_row_expand_size(row, rowsource->size);
296     rasqal_row_set_rowsource(row, rowsource);
297   }
298   /* transform rows from right to match new projection */
299   right_size = raptor_sequence_size(seq2);
300   for(i = 0; i < right_size; i++) {
301     rasqal_row *row = (rasqal_row*)raptor_sequence_get_at(seq2, i);
302     /* rows from right need resizing and adjusting by offset */
303     rasqal_row_expand_size(row, rowsource->size);
304     rasqal_union_rowsource_adjust_right_row(rowsource, con, row);
305     row->offset += left_size;
306     rasqal_row_set_rowsource(row, rowsource);
307   }
308 
309   if(raptor_sequence_join(seq1, seq2)) {
310     raptor_free_sequence(seq1);
311     seq1 = NULL;
312   }
313   raptor_free_sequence(seq2);
314 
315   con->state = 2;
316   return seq1;
317 }
318 
319 
320 static int
rasqal_union_rowsource_reset(rasqal_rowsource * rowsource,void * user_data)321 rasqal_union_rowsource_reset(rasqal_rowsource* rowsource, void *user_data)
322 {
323   rasqal_union_rowsource_context* con;
324   int rc;
325 
326   con = (rasqal_union_rowsource_context*)user_data;
327 
328   con->state = 0;
329   con->failed = 0;
330 
331   rc = rasqal_rowsource_reset(con->left);
332   if(rc)
333     return rc;
334 
335   return rasqal_rowsource_reset(con->right);
336 }
337 
338 
339 static rasqal_rowsource*
rasqal_union_rowsource_get_inner_rowsource(rasqal_rowsource * rowsource,void * user_data,int offset)340 rasqal_union_rowsource_get_inner_rowsource(rasqal_rowsource* rowsource,
341                                            void *user_data, int offset)
342 {
343   rasqal_union_rowsource_context *con;
344   con = (rasqal_union_rowsource_context*)user_data;
345 
346   if(offset == 0)
347     return con->left;
348   else if(offset == 1)
349     return con->right;
350   else
351     return NULL;
352 }
353 
354 
355 static const rasqal_rowsource_handler rasqal_union_rowsource_handler = {
356   /* .version = */ 1,
357   "union",
358   /* .init = */ rasqal_union_rowsource_init,
359   /* .finish = */ rasqal_union_rowsource_finish,
360   /* .ensure_variables = */ rasqal_union_rowsource_ensure_variables,
361   /* .read_row = */ rasqal_union_rowsource_read_row,
362   /* .read_all_rows = */ rasqal_union_rowsource_read_all_rows,
363   /* .reset = */ rasqal_union_rowsource_reset,
364   /* .set_requirements = */ NULL,
365   /* .get_inner_rowsource = */ rasqal_union_rowsource_get_inner_rowsource,
366   /* .set_origin = */ NULL,
367 };
368 
369 
370 /**
371  * rasqal_new_union_rowsource:
372  * @world: world object
373  * @query: query object
374  * @left: left (first) rowsource
375  * @right: right (second) rowsource
376  *
377  * INTERNAL - create a new UNION over two rowsources
378  *
379  * This uses the number of variables in @vt to set the rowsource size
380  * (order size is always 0) and then checks that all the rows in the
381  * sequence are the same.  If not, construction fails and NULL is
382  * returned.
383  *
384  * The @left and @right rowsources become owned by the new rowsource.
385  *
386  * Return value: new rowsource or NULL on failure
387  */
388 rasqal_rowsource*
rasqal_new_union_rowsource(rasqal_world * world,rasqal_query * query,rasqal_rowsource * left,rasqal_rowsource * right)389 rasqal_new_union_rowsource(rasqal_world *world,
390                            rasqal_query* query,
391                            rasqal_rowsource* left,
392                            rasqal_rowsource* right)
393 {
394   rasqal_union_rowsource_context* con;
395   int flags = 0;
396 
397   if(!world || !query || !left || !right)
398     goto fail;
399 
400   con = RASQAL_CALLOC(rasqal_union_rowsource_context*, 1, sizeof(*con));
401   if(!con)
402     goto fail;
403 
404   con->left = left;
405   con->right = right;
406 
407   return rasqal_new_rowsource_from_handler(world, query,
408                                            con,
409                                            &rasqal_union_rowsource_handler,
410                                            query->vars_table,
411                                            flags);
412 
413   fail:
414   if(left)
415     rasqal_free_rowsource(left);
416   if(right)
417     rasqal_free_rowsource(right);
418   return NULL;
419 }
420 
421 
422 #endif /* not STANDALONE */
423 
424 
425 
426 #ifdef STANDALONE
427 
428 /* one more prototype */
429 int main(int argc, char *argv[]);
430 
431 
432 const char* const union_1_data_2x3_rows[] =
433 {
434   /* 2 variable names and 3 rows */
435   "a",   NULL, "b",   NULL,
436   /* row 1 data */
437   "foo", NULL, "bar", NULL,
438   /* row 2 data */
439   "baz", NULL, "fez", NULL,
440   /* row 3 data */
441   "bob", NULL, "sue", NULL,
442   /* end of data */
443   NULL, NULL, NULL, NULL
444 };
445 
446 
447 const char* const union_2_data_3x4_rows[] =
448 {
449   /* 3 variable names and 4 rows */
450   "b",     NULL, "c",      NULL, "d",      NULL,
451   /* row 1 data */
452   "red",   NULL, "orange", NULL, "yellow", NULL,
453   /* row 2 data */
454   "blue",  NULL, "indigo", NULL, "violet", NULL,
455   /* row 3 data */
456   "black", NULL, "silver", NULL, "gold",   NULL,
457   /* row 4 data */
458   "green", NULL, "tope",   NULL, "bronze", NULL,
459   /* end of data */
460   NULL, NULL, NULL, NULL, NULL, NULL
461 };
462 
463 
464 #define EXPECTED_ROWS_COUNT (3 + 4)
465 
466 /* there is one duplicate variable 'b' */
467 #define EXPECTED_COLUMNS_COUNT (2 + 3 - 1)
468 const char* const union_result_vars[] = { "a" , "b" , "c", "d" };
469 
470 
471 int
main(int argc,char * argv[])472 main(int argc, char *argv[])
473 {
474   const char *program = rasqal_basename(argv[0]);
475   rasqal_rowsource *rowsource = NULL;
476   rasqal_rowsource *left_rs = NULL;
477   rasqal_rowsource *right_rs = NULL;
478   rasqal_world* world = NULL;
479   rasqal_query* query = NULL;
480   int count;
481   raptor_sequence* seq = NULL;
482   int failures = 0;
483   int vars_count;
484   rasqal_variables_table* vt;
485   int size;
486   int expected_count = EXPECTED_ROWS_COUNT;
487   int expected_size = EXPECTED_COLUMNS_COUNT;
488   int i;
489   raptor_sequence* vars_seq = NULL;
490 
491   world = rasqal_new_world(); rasqal_world_open(world);
492 
493   query = rasqal_new_query(world, "sparql", NULL);
494 
495   vt = query->vars_table;
496 
497   /* 2 variables and 3 rows */
498   vars_count = 2;
499   seq = rasqal_new_row_sequence(world, vt, union_1_data_2x3_rows, vars_count,
500                                 &vars_seq);
501   if(!seq) {
502     fprintf(stderr,
503             "%s: failed to create left sequence of %d vars\n", program,
504             vars_count);
505     failures++;
506     goto tidy;
507   }
508 
509   left_rs = rasqal_new_rowsequence_rowsource(world, query, vt, seq, vars_seq);
510   if(!left_rs) {
511     fprintf(stderr, "%s: failed to create left rowsource\n", program);
512     failures++;
513     goto tidy;
514   }
515   /* vars_seq and seq are now owned by left_rs */
516   vars_seq = seq = NULL;
517 
518   /* 3 variables and 4 rows */
519   vars_count = 3;
520   seq = rasqal_new_row_sequence(world, vt, union_2_data_3x4_rows, vars_count,
521                                 &vars_seq);
522   if(!seq) {
523     fprintf(stderr,
524             "%s: failed to create right sequence of %d rows\n", program,
525             vars_count);
526     failures++;
527     goto tidy;
528   }
529 
530   right_rs = rasqal_new_rowsequence_rowsource(world, query, vt, seq, vars_seq);
531   if(!right_rs) {
532     fprintf(stderr, "%s: failed to create right rowsource\n", program);
533     failures++;
534     goto tidy;
535   }
536   /* vars_seq and seq are now owned by right_rs */
537   vars_seq = seq = NULL;
538 
539   rowsource = rasqal_new_union_rowsource(world, query, left_rs, right_rs);
540   if(!rowsource) {
541     fprintf(stderr, "%s: failed to create union rowsource\n", program);
542     failures++;
543     goto tidy;
544   }
545   /* left_rs and right_rs are now owned by rowsource */
546   left_rs = right_rs = NULL;
547 
548   seq = rasqal_rowsource_read_all_rows(rowsource);
549   if(!seq) {
550     fprintf(stderr,
551             "%s: read_rows returned a NULL seq for a union rowsource\n",
552             program);
553     failures++;
554     goto tidy;
555   }
556   count = raptor_sequence_size(seq);
557   if(count != expected_count) {
558     fprintf(stderr,
559             "%s: read_rows returned %d rows for a union rowsource, expected %d\n",
560             program, count, expected_count);
561     failures++;
562     goto tidy;
563   }
564 
565   size = rasqal_rowsource_get_size(rowsource);
566   if(size != expected_size) {
567     fprintf(stderr,
568             "%s: read_rows returned %d columns (variables) for a union rowsource, expected %d\n",
569             program, size, expected_size);
570     failures++;
571     goto tidy;
572   }
573   for(i = 0; i < expected_size; i++) {
574     rasqal_variable* v;
575     const char* name = NULL;
576     const char *expected_name = union_result_vars[i];
577 
578     v = rasqal_rowsource_get_variable_by_offset(rowsource, i);
579     if(!v) {
580       fprintf(stderr,
581             "%s: read_rows had NULL column (variable) #%d expected %s\n",
582               program, i, expected_name);
583       failures++;
584       goto tidy;
585     }
586     name = RASQAL_GOOD_CAST(const char*, v->name);
587     if(strcmp(name, expected_name)) {
588       fprintf(stderr,
589             "%s: read_rows returned column (variable) #%d %s but expected %s\n",
590               program, i, name, expected_name);
591       failures++;
592       goto tidy;
593     }
594   }
595 
596 #ifdef RASQAL_DEBUG
597   rasqal_rowsource_print_row_sequence(rowsource, seq, DEBUG_FH);
598 #endif
599 
600   tidy:
601   if(seq)
602     raptor_free_sequence(seq);
603   if(left_rs)
604     rasqal_free_rowsource(left_rs);
605   if(right_rs)
606     rasqal_free_rowsource(right_rs);
607   if(rowsource)
608     rasqal_free_rowsource(rowsource);
609   if(query)
610     rasqal_free_query(query);
611   if(world)
612     rasqal_free_world(world);
613 
614   return failures;
615 }
616 
617 #endif /* STANDALONE */
618