1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * rasqal_rowsource_groupby.c - Rasqal GROUP BY and HAVING rowsource class
4 *
5 * Copyright (C) 2010, David Beckett http://www.dajobe.org/
6 *
7 * This package is Free Software and part of Redland http://librdf.org/
8 *
9 * It is licensed under the following three licenses as alternatives:
10 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11 * 2. GNU General Public License (GPL) V2 or any newer version
12 * 3. Apache License, V2.0 or any newer version
13 *
14 * You may not use this file except in compliance with at least one of
15 * the above three licenses.
16 *
17 * See LICENSE.html or LICENSE.txt at the top of this package for the
18 * complete terms and further detail along with the license texts for
19 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20 *
21 */
22
23
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31
32 #include <stdio.h>
33 #include <string.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37
38 #include <raptor.h>
39
40 #include "rasqal.h"
41 #include "rasqal_internal.h"
42
43
44 #define DEBUG_FH stderr
45
46
47 #ifndef STANDALONE
48
49
50
51 /**
52 * rasqal_groupby_rowsource_context:
53 *
54 * INTERNAL - GROUP BY rowsource context
55 *
56 * Structure for handing grouping an input rowsource by a sequence of
57 * #rasqal_expression - in SPARQL, the GROUP BY exprList.
58 *
59 */
60 typedef struct
61 {
62 /* inner rowsource to filter */
63 rasqal_rowsource *rowsource;
64
65 /* group expression list */
66 raptor_sequence* exprs_seq;
67
68 /* size of above list: can be 0 if @exprs_seq is NULL too */
69 int exprs_seq_size;
70
71 /* last group ID assigned */
72 int group_id;
73
74 /* non-0 if input has been processed */
75 int processed;
76
77 /* avltree for grouping.
78 * the tree nodes are #rasqal_groupby_tree_node objects
79 */
80 raptor_avltree* tree;
81
82 /* rasqal_literal_compare() flags */
83 int compare_flags;
84
85 /* iterator into tree above */
86 raptor_avltree_iterator* group_iterator;
87 /* index into sequence of rows at current avltree node */
88 int group_row_index;
89
90 /* output row offset */
91 int offset;
92 } rasqal_groupby_rowsource_context;
93
94
95 /**
96 * rasqal_groupby_tree_node:
97 *
98 * INTERNAL - Node structure for grouping rows by a sequence of literals
99 *
100 * Each node contains the data for one group
101 * [lit, lit, ...] -> [ row, row, row, ... ]
102 *
103 * key: raptor_sequence* of rasqal_literal*
104 * value: raptor_sequence* of rasqal_row*
105 *
106 * Plus an integer group ID identifier.
107 *
108 */
109 typedef struct {
110 rasqal_groupby_rowsource_context* con;
111
112 /* Integer ID of this group */
113 int group_id;
114
115 /* Key of this group (seq of literals) */
116 raptor_sequence* literals;
117
118 /* Value of this group (seq of rows) */
119 raptor_sequence* rows;
120
121 } rasqal_groupby_tree_node;
122
123
124 static void
rasqal_free_groupby_tree_node(rasqal_groupby_tree_node * node)125 rasqal_free_groupby_tree_node(rasqal_groupby_tree_node* node)
126 {
127 if(!node)
128 return;
129
130 if(node->literals)
131 raptor_free_sequence(node->literals);
132
133 if(node->rows)
134 raptor_free_sequence(node->rows);
135
136 RASQAL_FREE(rasqal_groupby_tree_node, node);
137 }
138
139
140 static int
rasqal_rowsource_groupby_tree_print_node(void * object,FILE * fh)141 rasqal_rowsource_groupby_tree_print_node(void *object, FILE *fh)
142 {
143 rasqal_groupby_tree_node* node = (rasqal_groupby_tree_node*)object;
144
145 fputs("Group\n Key Sequence of literals: ", fh);
146 if(node->literals)
147 /* sequence of literals */
148 raptor_sequence_print(node->literals, fh);
149 else
150 fputs("None", fh);
151
152 fputs("\n Value Sequence of rows:\n", fh);
153 if(node->rows) {
154 int i;
155 int size = raptor_sequence_size(node->rows);
156
157 /* sequence of rows */
158 for(i = 0; i < size; i++) {
159 rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(node->rows, i);
160
161 fprintf(fh, " Row %d: ", i);
162 rasqal_row_print(row, fh);
163 fputc('\n', fh);
164 }
165 } else
166 fputs("None\n", fh);
167
168 return 0;
169 }
170
171
172 static int
rasqal_rowsource_groupby_literal_sequence_compare(const void * a,const void * b)173 rasqal_rowsource_groupby_literal_sequence_compare(const void *a, const void *b)
174 {
175 rasqal_groupby_rowsource_context* con;
176 rasqal_groupby_tree_node* node_a = (rasqal_groupby_tree_node*)a;
177 rasqal_groupby_tree_node* node_b = (rasqal_groupby_tree_node*)b;
178
179 con = node_a->con;
180
181 return rasqal_literal_sequence_compare(con->compare_flags,
182 node_a->literals, node_b->literals);
183 }
184
185
186 static int
rasqal_groupby_rowsource_init(rasqal_rowsource * rowsource,void * user_data)187 rasqal_groupby_rowsource_init(rasqal_rowsource* rowsource, void *user_data)
188 {
189 rasqal_groupby_rowsource_context* con;
190 con = (rasqal_groupby_rowsource_context*)user_data;
191
192 con->group_id = -1;
193
194 con->compare_flags = RASQAL_COMPARE_URI;
195
196 con->offset = 0;
197 return 0;
198 }
199
200
201 static int
rasqal_groupby_rowsource_finish(rasqal_rowsource * rowsource,void * user_data)202 rasqal_groupby_rowsource_finish(rasqal_rowsource* rowsource, void *user_data)
203 {
204 rasqal_groupby_rowsource_context* con;
205 con = (rasqal_groupby_rowsource_context*)user_data;
206
207 if(con->rowsource)
208 rasqal_free_rowsource(con->rowsource);
209
210 if(con->exprs_seq)
211 raptor_free_sequence(con->exprs_seq);
212
213 if(con->tree)
214 raptor_free_avltree(con->tree);
215
216 if(con->group_iterator)
217 raptor_free_avltree_iterator(con->group_iterator);
218
219 RASQAL_FREE(rasqal_groupby_rowsource_context, con);
220
221 return 0;
222 }
223
224
225 static int
rasqal_groupby_rowsource_ensure_variables(rasqal_rowsource * rowsource,void * user_data)226 rasqal_groupby_rowsource_ensure_variables(rasqal_rowsource* rowsource,
227 void *user_data)
228 {
229 rasqal_groupby_rowsource_context* con;
230
231 con = (rasqal_groupby_rowsource_context*)user_data;
232
233 if(rasqal_rowsource_ensure_variables(con->rowsource))
234 return 1;
235
236 rowsource->size = 0;
237 if(rasqal_rowsource_copy_variables(rowsource, con->rowsource))
238 return 1;
239
240 return 0;
241 }
242
243
244 static int
rasqal_groupby_rowsource_process(rasqal_rowsource * rowsource,rasqal_groupby_rowsource_context * con)245 rasqal_groupby_rowsource_process(rasqal_rowsource* rowsource,
246 rasqal_groupby_rowsource_context* con)
247 {
248 /* already processed */
249 if(con->processed)
250 return 0;
251
252 con->processed = 1;
253
254 /* Empty expression list - no need to read rows */
255 if(!con->exprs_seq || !con->exprs_seq_size) {
256 con->group_id++;
257 return 0;
258 }
259
260
261 con->tree = raptor_new_avltree(rasqal_rowsource_groupby_literal_sequence_compare,
262 (raptor_data_free_handler)rasqal_free_groupby_tree_node,
263 /* flags */ 0);
264
265 if(!con->tree)
266 return 1;
267
268 raptor_avltree_set_print_handler(con->tree,
269 rasqal_rowsource_groupby_tree_print_node);
270
271
272 while(1) {
273 rasqal_row* row;
274
275 row = rasqal_rowsource_read_row(con->rowsource);
276 if(!row)
277 break;
278
279 rasqal_row_bind_variables(row, rowsource->query->vars_table);
280
281 if(con->exprs_seq) {
282 raptor_sequence* literal_seq;
283 rasqal_groupby_tree_node key;
284 rasqal_groupby_tree_node* node;
285
286 literal_seq = rasqal_expression_sequence_evaluate(rowsource->query,
287 con->exprs_seq,
288 /* ignore_errors */ 0,
289 /* error_p */ NULL);
290
291 if(!literal_seq) {
292 /* FIXME - what to do on errors? */
293 continue;
294 }
295
296 memset(&key, '\0', sizeof(key));
297 key.con = con;
298 key.literals = literal_seq;
299
300 node = (rasqal_groupby_tree_node*)raptor_avltree_search(con->tree, &key);
301 if(!node) {
302 /* New Group */
303 node = RASQAL_CALLOC(rasqal_groupby_tree_node*, 1, sizeof(*node));
304 if(!node) {
305 raptor_free_sequence(literal_seq);
306 return 1;
307 }
308
309 node->con = con;
310 node->group_id = ++con->group_id;
311
312 /* node now owns literal_seq */
313 node->literals = literal_seq;
314
315 node->rows = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
316 if(!node->rows) {
317 rasqal_free_groupby_tree_node(node);
318 return 1;
319 }
320
321 /* after this, node is owned by con->tree */
322 raptor_avltree_add(con->tree, node);
323 } else
324 raptor_free_sequence(literal_seq);
325
326 row->group_id = node->group_id;
327
328 /* after this, node owns the row */
329 raptor_sequence_push(node->rows, row);
330
331 }
332 }
333
334 #ifdef RASQAL_DEBUG
335 fputs("Grouping ", DEBUG_FH);
336 raptor_avltree_print(con->tree, DEBUG_FH);
337 fputs("\n", DEBUG_FH);
338 #endif
339
340 if(raptor_avltree_size(con->tree))
341 con->group_iterator = raptor_new_avltree_iterator(con->tree,
342 NULL, NULL,
343 1);
344
345 con->group_row_index = 0;
346
347 con->offset = 0;
348
349 return 0;
350 }
351
352
353 static rasqal_row*
rasqal_groupby_rowsource_read_row(rasqal_rowsource * rowsource,void * user_data)354 rasqal_groupby_rowsource_read_row(rasqal_rowsource* rowsource, void *user_data)
355 {
356 rasqal_groupby_rowsource_context* con;
357 rasqal_row *row = NULL;
358
359 con = (rasqal_groupby_rowsource_context*)user_data;
360
361 /* ensure we have stored grouped rows */
362 if(rasqal_groupby_rowsource_process(rowsource, con))
363 return NULL;
364
365 if(con->tree && con->group_iterator) {
366 rasqal_groupby_tree_node* node = NULL;
367
368 /* Rows were grouped so iterate through grouped rows */
369 while(1) {
370 node = (rasqal_groupby_tree_node*)raptor_avltree_iterator_get(con->group_iterator);
371 if(!node) {
372 /* No more nodes. finished last group and last row */
373 raptor_free_avltree_iterator(con->group_iterator);
374 con->group_iterator = NULL;
375
376 raptor_free_avltree(con->tree);
377 con->tree = NULL;
378
379 /* row = NULL is already set */
380 break;
381 }
382
383 /* removes row from sequence and this code now owns the reference */
384 row = (rasqal_row*)raptor_sequence_delete_at(node->rows,
385 con->group_row_index++);
386 if(row) {
387 /* Bind the values in the input row to the variables in the table */
388 rasqal_row_bind_variables(row, rowsource->query->vars_table);
389 break;
390 }
391
392 /* End of sequence so reset row sequence index and advance iterator */
393 con->group_row_index = 0;
394
395 if(raptor_avltree_iterator_next(con->group_iterator))
396 break;
397 }
398
399 if(node && row)
400 row->group_id = node->group_id;
401
402 } else if(con->tree && !con->group_iterator) {
403 /* we found inner rowsource with no rows - generate 1 row */
404 if(!con->offset) {
405 row = rasqal_new_row(rowsource);
406
407 if(row)
408 row->group_id = 0;
409 }
410 } else {
411 /* no grouping: just pass rows through all in one group */
412 row = rasqal_rowsource_read_row(con->rowsource);
413
414 if(row)
415 row->group_id = con->group_id;
416 }
417
418 if(row)
419 row->offset = con->offset++;
420
421 return row;
422 }
423
424
425 static int
rasqal_groupby_rowsource_reset(rasqal_rowsource * rowsource,void * user_data)426 rasqal_groupby_rowsource_reset(rasqal_rowsource* rowsource, void *user_data)
427 {
428 return 0;
429 }
430
431
432 static rasqal_rowsource*
rasqal_groupby_rowsource_get_inner_rowsource(rasqal_rowsource * rowsource,void * user_data,int offset)433 rasqal_groupby_rowsource_get_inner_rowsource(rasqal_rowsource* rowsource,
434 void *user_data, int offset)
435 {
436 rasqal_groupby_rowsource_context *con;
437 con = (rasqal_groupby_rowsource_context*)user_data;
438
439 if(offset == 0)
440 return con->rowsource;
441
442 return NULL;
443 }
444
445
446 static const rasqal_rowsource_handler rasqal_groupby_rowsource_handler = {
447 /* .version = */ 1,
448 "groupby",
449 /* .init = */ rasqal_groupby_rowsource_init,
450 /* .finish = */ rasqal_groupby_rowsource_finish,
451 /* .ensure_variables = */ rasqal_groupby_rowsource_ensure_variables,
452 /* .read_row = */ rasqal_groupby_rowsource_read_row,
453 /* .read_all_rows = */ NULL,
454 /* .reset = */ rasqal_groupby_rowsource_reset,
455 /* .set_requirements = */ NULL,
456 /* .get_inner_rowsource = */ rasqal_groupby_rowsource_get_inner_rowsource,
457 /* .set_origin = */ NULL,
458 };
459
460
461 /**
462 * rasqal_new_groupby_rowsource:
463 * @world: world object
464 * @query: query object
465 * @rowsource: input rowsource
466 * @exprs_seq: sequence of group by expressions
467 *
468 * INTERNAL - create a new group by rowsource
469 *
470 * the @rowsource becomes owned by the new rowsource
471 *
472 * Return value: new rowsource or NULL on failure
473 */
474 rasqal_rowsource*
rasqal_new_groupby_rowsource(rasqal_world * world,rasqal_query * query,rasqal_rowsource * rowsource,raptor_sequence * exprs_seq)475 rasqal_new_groupby_rowsource(rasqal_world *world,
476 rasqal_query* query,
477 rasqal_rowsource* rowsource,
478 raptor_sequence* exprs_seq)
479 {
480 rasqal_groupby_rowsource_context* con;
481 int flags = 0;
482
483 if(!world || !query)
484 return NULL;
485
486 con = RASQAL_CALLOC(rasqal_groupby_rowsource_context*, 1, sizeof(*con));
487 if(!con)
488 return NULL;
489
490 con->rowsource = rowsource;
491 con->exprs_seq_size = 0;
492
493 if(exprs_seq) {
494 con->exprs_seq = rasqal_expression_copy_expression_sequence(exprs_seq);
495
496 if(!con->exprs_seq)
497 goto fail;
498
499 con->exprs_seq_size = raptor_sequence_size(exprs_seq);
500 }
501
502 return rasqal_new_rowsource_from_handler(world, query,
503 con,
504 &rasqal_groupby_rowsource_handler,
505 query->vars_table,
506 flags);
507
508 fail:
509
510 if(rowsource)
511 rasqal_free_rowsource(rowsource);
512 if(exprs_seq)
513 raptor_free_sequence(exprs_seq);
514 if(con)
515 RASQAL_FREE(rasqal_groupby_rowsource_context*, con);
516
517 return NULL;
518 }
519
520
521 #endif /* not STANDALONE */
522
523
524
525 #ifdef STANDALONE
526
527 /* one more prototype */
528 int main(int argc, char *argv[]);
529
530
531 /*
532 * Test 0 and Test 1 test the following example from SPARQL 1.1 Query Draft
533
534 "
535 For example, given a
536 solution sequence S, ( {?x→2, ?y→3}, {?x→2, ?y→5}, {?x→6, ?y→7} ),
537
538 Group((?x), S) = {
539 (2) → ( {?x→2, ?y→3}, {?x→2, ?y→5} ),
540 (6) → ( {?x→6, ?y→7} )
541 }
542 "
543 */
544
545
546 #define GROUP_TESTS_COUNT 4
547
548 #define MAX_TEST_GROUPS 100
549 #define MAX_TEST_VARS 5
550
551 /* Test 0 */
552 static const char* const data_xy_no_rows[] =
553 {
554 /* 2 variable names and 0 rows */
555 "x", NULL, "y", NULL,
556 NULL, NULL, NULL, NULL,
557 };
558
559 /* Test 1 and Test 2 */
560 static const char* const data_xy_3_rows[] =
561 {
562 /* 2 variable names and 3 rows */
563 "x", NULL, "y", NULL,
564 /* row 1 data */
565 "2", NULL, "3", NULL,
566 /* row 2 data */
567 "2", NULL, "5", NULL,
568 /* row 3 data */
569 "6", NULL, "7", NULL,
570 /* end of data */
571 NULL, NULL, NULL, NULL,
572 };
573
574
575 /* Test 3 */
576 static const char* const data_us_senators_100_rows[] =
577 {
578 /* 3 variable names and 50 rows */
579 "name", NULL, "state", NULL, "year", NULL,
580 /* row 1 data */
581 "Al", NULL, "Minnesota", NULL, "1951", NULL,
582 "Amy", NULL, "Minnesota", NULL, "1960", NULL,
583 "Arlen", NULL, "Pennsylvania", NULL, "1930", NULL,
584 "Barbara", NULL, "California", NULL, "1940", NULL,
585 "Barbara", NULL, "Maryland", NULL, "1936", NULL,
586 "Ben", NULL, "Maryland", NULL, "1943", NULL,
587 "Ben", NULL, "Nebraska", NULL, "1941", NULL,
588 "Bernie", NULL, "Vermont", NULL, "1941", NULL,
589 "Bill", NULL, "Florida", NULL, "1942", NULL,
590 "Blanche", NULL, "Arkansas", NULL, "1960", NULL,
591 "Bob", NULL, "Utah", NULL, "1933", NULL,
592 "Bob", NULL, "Pennsylvania", NULL, "1960", NULL,
593 "Bob", NULL, "Tennessee", NULL, "1952", NULL,
594 "Bob", NULL, "New Jersey", NULL, "1954", NULL,
595 "Byron", NULL, "North Dakota", NULL, "1942", NULL,
596 "Carl", NULL, "Michigan", NULL, "1934", NULL,
597 "Carte", NULL, "West Virginia", NULL, "1974", NULL,
598 "Christopher", NULL, "Connecticut", NULL, "1944", NULL,
599 "Chuck", NULL, "Iowa", NULL, "1933", NULL,
600 "Chuck", NULL, "New York", NULL, "1950", NULL,
601 "Claire", NULL, "Missouri", NULL, "1953", NULL,
602 "Daniel", NULL, "Hawaii", NULL, "1924", NULL,
603 "Daniel", NULL, "Hawaii", NULL, "1924", NULL,
604 "David", NULL, "Louisiana", NULL, "1961", NULL,
605 "Debbie", NULL, "Michigan", NULL, "1950", NULL,
606 "Dianne", NULL, "California", NULL, "1933", NULL,
607 "Dick", NULL, "Illinois", NULL, "1944", NULL,
608 "Evan", NULL, "Indiana", NULL, "1955", NULL,
609 "Frank", NULL, "New Jersey", NULL, "1924", NULL,
610 "George", NULL, "Florida", NULL, "1969", NULL,
611 "George", NULL, "Ohio", NULL, "1936", NULL,
612 "Harry", NULL, "Nevada", NULL, "1939", NULL,
613 "Herb", NULL, "Wisconsin", NULL, "1935", NULL,
614 "Jack", NULL, "Rhode Island", NULL, "1949", NULL,
615 "Jay", NULL, "West Virginia", NULL, "1937", NULL,
616 "Jeanne", NULL, "New Hampshire", NULL, "1947", NULL,
617 "Jeff", NULL, "New Mexico", NULL, "1943", NULL,
618 "Jeff", NULL, "Oregon", NULL, "1956", NULL,
619 "Jeff", NULL, "Alabama", NULL, "1946", NULL,
620 "Jim", NULL, "Kentucky", NULL, "1931", NULL,
621 "Jim", NULL, "South Carolina", NULL, "1951", NULL,
622 "Jim", NULL, "Oklahoma", NULL, "1934", NULL,
623 "Jim", NULL, "Idaho", NULL, "1943", NULL,
624 "Jim", NULL, "Virginia", NULL, "1946", NULL,
625 "Joe", NULL, "Connecticut", NULL, "1942", NULL,
626 "John", NULL, "Wyoming", NULL, "1952", NULL,
627 "John", NULL, "Texas", NULL, "1952", NULL,
628 "John", NULL, "Nevada", NULL, "1958", NULL,
629 "John", NULL, "Massachusetts", NULL, "1943", NULL,
630 "John", NULL, "Arizona", NULL, "1936", NULL,
631 "John", NULL, "South Dakota", NULL, "1961", NULL,
632 "Johnny", NULL, "Georgia", NULL, "1944", NULL,
633 "Jon", NULL, "Arizona", NULL, "1942", NULL,
634 "Jon", NULL, "Montana", NULL, "1956", NULL,
635 "Judd", NULL, "New Hampshire", NULL, "1947", NULL,
636 "Kay", NULL, "Texas", NULL, "1943", NULL,
637 "Kay", NULL, "North Carolina", NULL, "1953", NULL,
638 "Kent", NULL, "North Dakota", NULL, "1948", NULL,
639 "Kirsten", NULL, "New York", NULL, "1966", NULL,
640 "Kit", NULL, "Missouri", NULL, "1939", NULL,
641 "Lamar", NULL, "Tennessee", NULL, "1940", NULL,
642 "Lindsey", NULL, "South Carolina", NULL, "1955", NULL,
643 "Lisa", NULL, "Alaska", NULL, "1957", NULL,
644 "Maria", NULL, "Washington", NULL, "1958", NULL,
645 "Mark", NULL, "Alaska", NULL, "1962", NULL,
646 "Mark", NULL, "Arkansas", NULL, "1963", NULL,
647 "Mark", NULL, "Colorado", NULL, "1950", NULL,
648 "Mark", NULL, "Virginia", NULL, "1954", NULL,
649 "Mary", NULL, "Louisiana", NULL, "1955", NULL,
650 "Max", NULL, "Montana", NULL, "1941", NULL,
651 "Michael", NULL, "Colorado", NULL, "1964", NULL,
652 "Mike", NULL, "Idaho", NULL, "1951", NULL,
653 "Mike", NULL, "Wyoming", NULL, "1944", NULL,
654 "Mike", NULL, "Nebraska", NULL, "1950", NULL,
655 "Mitch", NULL, "Kentucky", NULL, "1942", NULL,
656 "Olympia", NULL, "Maine", NULL, "1947", NULL,
657 "Orrin", NULL, "Utah", NULL, "1934", NULL,
658 "Pat", NULL, "Kansas", NULL, "1936", NULL,
659 "Patrick", NULL, "Vermont", NULL, "1940", NULL,
660 "Patty", NULL, "Washington", NULL, "1950", NULL,
661 "Richard", NULL, "North Carolina", NULL, "1955", NULL,
662 "Richard", NULL, "Indiana", NULL, "1932", NULL,
663 "Richard", NULL, "Alabama", NULL, "1934", NULL,
664 "Roger", NULL, "Mississippi", NULL, "1951", NULL,
665 "Roland", NULL, "Illinois", NULL, "1937", NULL,
666 "Ron", NULL, "Oregon", NULL, "1949", NULL,
667 "Russ", NULL, "Wisconsin", NULL, "1953", NULL,
668 "Sam", NULL, "Kansas", NULL, "1956", NULL,
669 "Saxby", NULL, "Georgia", NULL, "1943", NULL,
670 "Scott", NULL, "Massachusetts", NULL, "1959", NULL,
671 "Sheldon", NULL, "Rhode Island", NULL, "1955", NULL,
672 "Sherrod", NULL, "Ohio", NULL, "1952", NULL,
673 "Susan", NULL, "Maine", NULL, "1952", NULL,
674 "Ted", NULL, "Delaware", NULL, "1939", NULL,
675 "Thad", NULL, "Mississippi", NULL, "1937", NULL,
676 "Tim", NULL, "South Dakota", NULL, "1946", NULL,
677 "Tom", NULL, "Delaware", NULL, "1947", NULL,
678 "Tom", NULL, "Oklahoma", NULL, "1948", NULL,
679 "Tom", NULL, "Iowa", NULL, "1939", NULL,
680 "Tom", NULL, "New Mexico", NULL, "1948", NULL,
681 /* end of data */
682 NULL, NULL, NULL, NULL, NULL, NULL,
683 };
684
685
686 /* Group IDs expected */
687 /* Test 0 */
688 static const int test0_groupids[] = {
689 0
690 };
691
692 /* Test 1 */
693 static const int test1_groupids[] = {
694 0, 0, 0
695 };
696
697 /* Test 2 */
698 static const int test2_groupids[] = {
699 0, 0, 1
700 };
701
702
703 /* Raptor AVL Tree - Enumerated by order in AVL Tree which is sorted by expression list */
704 static const int results_us_senators_97_groups[] =
705 { 21, 21, 27, 2, 38, 79, 10, 18, 24, 15, 40, 74, 80, 31, 4, 29, 47, 75, 33, 82, 92, 30, 57, 91, 96, 3, 58, 76, 6, 7, 67, 8, 14, 43, 50, 72, 5, 35, 41, 46, 53, 86, 17, 25, 49, 70, 37, 42, 93, 34, 52, 73, 94, 55, 95, 95, 32, 83, 19, 23, 64, 71, 77, 0, 39, 69, 81, 12, 44, 44, 89, 90, 20, 54, 84, 13, 65, 26, 59, 66, 78, 88, 36, 51, 85, 60, 45, 61, 87, 1, 9, 11, 22, 48, 62, 63, 68, 56, 28, 16 };
706
707
708
709 static const struct {
710 int vars;
711 int rows;
712 int ngroups;
713 const char* const *data;
714 const int *group_ids;
715 const char* const expr_vars[MAX_TEST_VARS];
716 } test_data[GROUP_TESTS_COUNT] = {
717 /* Test 0: No GROUP BY : 1 group expected with NULL values */
718 {2, 1, 1, data_xy_no_rows, test0_groupids, { "x", NULL } },
719
720 /* Test 1: No GROUP BY : 1 group expected */
721 {2, 3, 1, data_xy_3_rows, test1_groupids, { NULL } },
722
723 /* Test 2: GROUP BY ?x : 2 groups expected */
724 {2, 3, 2, data_xy_3_rows, test2_groupids, { "x", NULL } },
725
726 /* Test 3: GROUP BY ?year, ?name : 97 groups expected */
727 {3, 100, 97, data_us_senators_100_rows, results_us_senators_97_groups, { "year", "name", NULL } },
728
729 };
730
731
732
733 int
main(int argc,char * argv[])734 main(int argc, char *argv[])
735 {
736 const char *program = rasqal_basename(argv[0]);
737 rasqal_rowsource *rowsource = NULL;
738 rasqal_world* world = NULL;
739 rasqal_query* query = NULL;
740 raptor_sequence* row_seq = NULL;
741 raptor_sequence* exprs_seq = NULL;
742 int failures = 0;
743 rasqal_variables_table* vt;
744 rasqal_rowsource *input_rs = NULL;
745 int vars_count;
746 raptor_sequence* vars_seq = NULL;
747 int test_id;
748
749 world = rasqal_new_world();
750 if(!world || rasqal_world_open(world)) {
751 fprintf(stderr, "%s: rasqal_world init failed\n", program);
752 return(1);
753 }
754
755 query = rasqal_new_query(world, "sparql", NULL);
756
757 vt = query->vars_table;
758
759 for(test_id = 0; test_id < GROUP_TESTS_COUNT; test_id++) {
760 int expected_rows_count = test_data[test_id].rows;
761 int expected_vars_count = test_data[test_id].vars;
762 const int* expected_group_ids = test_data[test_id].group_ids;
763 int expected_ngroups = test_data[test_id].ngroups;
764 raptor_sequence* seq = NULL;
765 int count;
766 int size;
767 int i;
768 int groups_counted;
769 int last_group_id;
770
771 vars_count = expected_vars_count;
772 row_seq = rasqal_new_row_sequence(world, vt, test_data[test_id].data,
773 vars_count, &vars_seq);
774 if(row_seq) {
775 input_rs = rasqal_new_rowsequence_rowsource(world, query, vt,
776 row_seq, vars_seq);
777 /* vars_seq and row_seq are now owned by input_rs */
778 vars_seq = row_seq = NULL;
779 }
780 if(!input_rs) {
781 fprintf(stderr, "%s: failed to create rowsequence rowsource\n", program);
782 failures++;
783 goto tidy;
784 }
785
786
787 exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
788 (raptor_data_print_handler)rasqal_expression_print);
789
790 if(test_data[test_id].expr_vars[0] != NULL) {
791 int vindex;
792 const unsigned char* var_name;
793
794 for(vindex = 0;
795 (var_name = RASQAL_GOOD_CAST(const unsigned char*, test_data[test_id].expr_vars[vindex] ));
796 vindex++) {
797 rasqal_variable* v;
798 rasqal_literal *l = NULL;
799 rasqal_expression* e = NULL;
800
801 v = rasqal_variables_table_get_by_name(vt, RASQAL_VARIABLE_TYPE_NORMAL,
802 var_name);
803 /* returns SHARED pointer to variable */
804 if(v) {
805 v = rasqal_new_variable_from_variable(v);
806 l = rasqal_new_variable_literal(world, v);
807 }
808
809 if(l)
810 e = rasqal_new_literal_expression(world, l);
811
812 if(e)
813 raptor_sequence_push(exprs_seq, e);
814 else {
815 fprintf(stderr, "%s: failed to create variable %s\n", program,
816 RASQAL_GOOD_CAST(const char*, var_name));
817 failures++;
818 goto tidy;
819 }
820
821 }
822 }
823
824 rowsource = rasqal_new_groupby_rowsource(world, query, input_rs, exprs_seq);
825 /* input_rs is now owned by rowsource */
826 input_rs = NULL;
827
828 if(!rowsource) {
829 fprintf(stderr, "%s: failed to create groupby rowsource\n", program);
830 failures++;
831 goto tidy;
832 }
833
834 seq = rasqal_rowsource_read_all_rows(rowsource);
835 if(!seq) {
836 fprintf(stderr,
837 "%s: test %d rasqal_rowsource_read_all_rows() returned a NULL seq for a groupby rowsource\n",
838 program, test_id);
839 failures++;
840 goto tidy;
841 }
842 count = raptor_sequence_size(seq);
843 if(count != expected_rows_count) {
844 fprintf(stderr,
845 "%s: test %d rasqal_rowsource_read_all_rows() returned %d rows for a groupby rowsource, expected %d\n",
846 program, test_id, count, expected_rows_count);
847 failures++;
848 goto tidy;
849 }
850
851 size = rasqal_rowsource_get_size(rowsource);
852 if(size != expected_vars_count) {
853 fprintf(stderr,
854 "%s: test %d rasqal_rowsource_get_size() returned %d columns (variables) for a groupby rowsource, expected %d\n",
855 program, test_id, size, expected_vars_count);
856 failures++;
857 goto tidy;
858 }
859
860 groups_counted = 0;
861 last_group_id = -1;
862 for(i = 0; i < count; i++) {
863 rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
864
865 if(row->group_id != last_group_id) {
866 groups_counted++;
867 last_group_id = row->group_id;
868 }
869
870 if(row->group_id != expected_group_ids[i]) {
871 fprintf(stderr, "%s: test %d row #%d has group_id %d, expected %d\n",
872 program, test_id, i, row->group_id, expected_group_ids[i]);
873 failures++;
874 goto tidy;
875 }
876
877 }
878
879 if(groups_counted != expected_ngroups) {
880 fprintf(stderr, "%s: test %d returnd %d groups, expected %d\n",
881 program, test_id, groups_counted, expected_ngroups);
882 failures++;
883 goto tidy;
884 }
885
886 #ifdef RASQAL_DEBUG
887 rasqal_rowsource_print_row_sequence(rowsource, seq, stderr);
888 #endif
889
890 raptor_free_sequence(seq); seq = NULL;
891
892 rasqal_free_rowsource(rowsource); rowsource = NULL;
893
894 if(exprs_seq)
895 raptor_free_sequence(exprs_seq);
896 exprs_seq = NULL;
897 }
898
899 tidy:
900 if(exprs_seq)
901 raptor_free_sequence(exprs_seq);
902 if(rowsource)
903 rasqal_free_rowsource(rowsource);
904 if(input_rs)
905 rasqal_free_rowsource(input_rs);
906 if(query)
907 rasqal_free_query(query);
908 if(world)
909 rasqal_free_world(world);
910
911 return failures;
912 }
913
914 #endif /* STANDALONE */
915