1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_rowsource_groupby.c - Rasqal GROUP BY and HAVING rowsource class
4  *
5  * Copyright (C) 2010, David Beckett http://www.dajobe.org/
6  *
7  * This package is Free Software and part of Redland http://librdf.org/
8  *
9  * It is licensed under the following three licenses as alternatives:
10  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
11  *   2. GNU General Public License (GPL) V2 or any newer version
12  *   3. Apache License, V2.0 or any newer version
13  *
14  * You may not use this file except in compliance with at least one of
15  * the above three licenses.
16  *
17  * See LICENSE.html or LICENSE.txt at the top of this package for the
18  * complete terms and further detail along with the license texts for
19  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
20  *
21  */
22 
23 
24 #ifdef HAVE_CONFIG_H
25 #include <rasqal_config.h>
26 #endif
27 
28 #ifdef WIN32
29 #include <win32_rasqal_config.h>
30 #endif
31 
32 #include <stdio.h>
33 #include <string.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 
38 #include <raptor.h>
39 
40 #include "rasqal.h"
41 #include "rasqal_internal.h"
42 
43 
44 #define DEBUG_FH stderr
45 
46 
47 #ifndef STANDALONE
48 
49 
50 
51 /**
52  * rasqal_groupby_rowsource_context:
53  *
54  * INTERNAL - GROUP BY rowsource context
55  *
56  * Structure for handing grouping an input rowsource by a sequence of
57  * #rasqal_expression - in SPARQL, the GROUP BY exprList.
58  *
59  */
60 typedef struct
61 {
62   /* inner rowsource to filter */
63   rasqal_rowsource *rowsource;
64 
65   /* group expression list */
66   raptor_sequence* exprs_seq;
67 
68   /* size of above list: can be 0 if @exprs_seq is NULL too */
69   int exprs_seq_size;
70 
71   /* last group ID assigned */
72   int group_id;
73 
74   /* non-0 if input has been processed */
75   int processed;
76 
77   /* avltree for grouping.
78    * the tree nodes are #rasqal_groupby_tree_node objects
79    */
80   raptor_avltree* tree;
81 
82   /* rasqal_literal_compare() flags */
83   int compare_flags;
84 
85   /* iterator into tree above */
86   raptor_avltree_iterator* group_iterator;
87   /* index into sequence of rows at current avltree node */
88   int group_row_index;
89 
90   /* output row offset */
91   int offset;
92 } rasqal_groupby_rowsource_context;
93 
94 
95 /**
96  * rasqal_groupby_tree_node:
97  *
98  * INTERNAL - Node structure for grouping rows by a sequence of literals
99  *
100  * Each node contains the data for one group
101  *    [lit, lit, ...] -> [ row, row, row, ... ]
102  *
103  *  key: raptor_sequence* of rasqal_literal*
104  *  value: raptor_sequence* of rasqal_row*
105  *
106  * Plus an integer group ID identifier.
107  *
108  */
109 typedef struct {
110   rasqal_groupby_rowsource_context* con;
111 
112   /* Integer ID of this group */
113   int group_id;
114 
115   /* Key of this group (seq of literals) */
116   raptor_sequence* literals;
117 
118   /* Value of this group (seq of rows) */
119   raptor_sequence* rows;
120 
121 } rasqal_groupby_tree_node;
122 
123 
124 static void
rasqal_free_groupby_tree_node(rasqal_groupby_tree_node * node)125 rasqal_free_groupby_tree_node(rasqal_groupby_tree_node* node)
126 {
127   if(!node)
128     return;
129 
130   if(node->literals)
131     raptor_free_sequence(node->literals);
132 
133   if(node->rows)
134     raptor_free_sequence(node->rows);
135 
136   RASQAL_FREE(rasqal_groupby_tree_node, node);
137 }
138 
139 
140 static int
rasqal_rowsource_groupby_tree_print_node(void * object,FILE * fh)141 rasqal_rowsource_groupby_tree_print_node(void *object, FILE *fh)
142 {
143   rasqal_groupby_tree_node* node = (rasqal_groupby_tree_node*)object;
144 
145   fputs("Group\n  Key Sequence of literals: ", fh);
146   if(node->literals)
147     /* sequence of literals */
148     raptor_sequence_print(node->literals, fh);
149   else
150     fputs("None", fh);
151 
152   fputs("\n  Value Sequence of rows:\n", fh);
153   if(node->rows) {
154     int i;
155     int size = raptor_sequence_size(node->rows);
156 
157     /* sequence of rows */
158     for(i = 0; i < size; i++) {
159       rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(node->rows, i);
160 
161       fprintf(fh, "    Row %d: ", i);
162       rasqal_row_print(row, fh);
163       fputc('\n', fh);
164     }
165   } else
166     fputs("None\n", fh);
167 
168   return 0;
169 }
170 
171 
172 static int
rasqal_rowsource_groupby_literal_sequence_compare(const void * a,const void * b)173 rasqal_rowsource_groupby_literal_sequence_compare(const void *a, const void *b)
174 {
175   rasqal_groupby_rowsource_context* con;
176   rasqal_groupby_tree_node* node_a = (rasqal_groupby_tree_node*)a;
177   rasqal_groupby_tree_node* node_b = (rasqal_groupby_tree_node*)b;
178 
179   con = node_a->con;
180 
181   return rasqal_literal_sequence_compare(con->compare_flags,
182                                          node_a->literals, node_b->literals);
183 }
184 
185 
186 static int
rasqal_groupby_rowsource_init(rasqal_rowsource * rowsource,void * user_data)187 rasqal_groupby_rowsource_init(rasqal_rowsource* rowsource, void *user_data)
188 {
189   rasqal_groupby_rowsource_context* con;
190   con = (rasqal_groupby_rowsource_context*)user_data;
191 
192   con->group_id = -1;
193 
194   con->compare_flags = RASQAL_COMPARE_URI;
195 
196   con->offset = 0;
197   return 0;
198 }
199 
200 
201 static int
rasqal_groupby_rowsource_finish(rasqal_rowsource * rowsource,void * user_data)202 rasqal_groupby_rowsource_finish(rasqal_rowsource* rowsource, void *user_data)
203 {
204   rasqal_groupby_rowsource_context* con;
205   con = (rasqal_groupby_rowsource_context*)user_data;
206 
207   if(con->rowsource)
208     rasqal_free_rowsource(con->rowsource);
209 
210   if(con->exprs_seq)
211     raptor_free_sequence(con->exprs_seq);
212 
213   if(con->tree)
214     raptor_free_avltree(con->tree);
215 
216   if(con->group_iterator)
217     raptor_free_avltree_iterator(con->group_iterator);
218 
219   RASQAL_FREE(rasqal_groupby_rowsource_context, con);
220 
221   return 0;
222 }
223 
224 
225 static int
rasqal_groupby_rowsource_ensure_variables(rasqal_rowsource * rowsource,void * user_data)226 rasqal_groupby_rowsource_ensure_variables(rasqal_rowsource* rowsource,
227                                           void *user_data)
228 {
229   rasqal_groupby_rowsource_context* con;
230 
231   con = (rasqal_groupby_rowsource_context*)user_data;
232 
233   if(rasqal_rowsource_ensure_variables(con->rowsource))
234     return 1;
235 
236   rowsource->size = 0;
237   if(rasqal_rowsource_copy_variables(rowsource, con->rowsource))
238     return 1;
239 
240   return 0;
241 }
242 
243 
244 static int
rasqal_groupby_rowsource_process(rasqal_rowsource * rowsource,rasqal_groupby_rowsource_context * con)245 rasqal_groupby_rowsource_process(rasqal_rowsource* rowsource,
246                                  rasqal_groupby_rowsource_context* con)
247 {
248   /* already processed */
249   if(con->processed)
250     return 0;
251 
252   con->processed = 1;
253 
254   /* Empty expression list - no need to read rows */
255   if(!con->exprs_seq || !con->exprs_seq_size) {
256     con->group_id++;
257     return 0;
258   }
259 
260 
261   con->tree = raptor_new_avltree(rasqal_rowsource_groupby_literal_sequence_compare,
262                                  (raptor_data_free_handler)rasqal_free_groupby_tree_node,
263                                  /* flags */ 0);
264 
265   if(!con->tree)
266     return 1;
267 
268   raptor_avltree_set_print_handler(con->tree,
269                                    rasqal_rowsource_groupby_tree_print_node);
270 
271 
272   while(1) {
273     rasqal_row* row;
274 
275     row = rasqal_rowsource_read_row(con->rowsource);
276     if(!row)
277       break;
278 
279     rasqal_row_bind_variables(row, rowsource->query->vars_table);
280 
281     if(con->exprs_seq) {
282       raptor_sequence* literal_seq;
283       rasqal_groupby_tree_node key;
284       rasqal_groupby_tree_node* node;
285 
286       literal_seq = rasqal_expression_sequence_evaluate(rowsource->query,
287                                                         con->exprs_seq,
288                                                         /* ignore_errors */ 0,
289                                                         /* error_p */ NULL);
290 
291       if(!literal_seq) {
292         /* FIXME - what to do on errors? */
293         continue;
294       }
295 
296       memset(&key, '\0', sizeof(key));
297       key.con = con;
298       key.literals = literal_seq;
299 
300       node = (rasqal_groupby_tree_node*)raptor_avltree_search(con->tree, &key);
301       if(!node) {
302         /* New Group */
303         node = RASQAL_CALLOC(rasqal_groupby_tree_node*, 1, sizeof(*node));
304         if(!node) {
305           raptor_free_sequence(literal_seq);
306           return 1;
307         }
308 
309         node->con = con;
310         node->group_id = ++con->group_id;
311 
312         /* node now owns literal_seq */
313         node->literals = literal_seq;
314 
315         node->rows = raptor_new_sequence((raptor_data_free_handler)rasqal_free_row, (raptor_data_print_handler)rasqal_row_print);
316         if(!node->rows) {
317           rasqal_free_groupby_tree_node(node);
318           return 1;
319         }
320 
321         /* after this, node is owned by con->tree */
322         raptor_avltree_add(con->tree, node);
323       } else
324         raptor_free_sequence(literal_seq);
325 
326       row->group_id = node->group_id;
327 
328       /* after this, node owns the row */
329       raptor_sequence_push(node->rows, row);
330 
331     }
332   }
333 
334 #ifdef RASQAL_DEBUG
335   fputs("Grouping ", DEBUG_FH);
336   raptor_avltree_print(con->tree, DEBUG_FH);
337   fputs("\n", DEBUG_FH);
338 #endif
339 
340   if(raptor_avltree_size(con->tree))
341     con->group_iterator = raptor_new_avltree_iterator(con->tree,
342                                                       NULL, NULL,
343                                                       1);
344 
345   con->group_row_index = 0;
346 
347   con->offset = 0;
348 
349   return 0;
350 }
351 
352 
353 static rasqal_row*
rasqal_groupby_rowsource_read_row(rasqal_rowsource * rowsource,void * user_data)354 rasqal_groupby_rowsource_read_row(rasqal_rowsource* rowsource, void *user_data)
355 {
356   rasqal_groupby_rowsource_context* con;
357   rasqal_row *row = NULL;
358 
359   con = (rasqal_groupby_rowsource_context*)user_data;
360 
361   /* ensure we have stored grouped rows */
362   if(rasqal_groupby_rowsource_process(rowsource, con))
363     return NULL;
364 
365   if(con->tree && con->group_iterator) {
366     rasqal_groupby_tree_node* node = NULL;
367 
368     /* Rows were grouped so iterate through grouped rows */
369     while(1) {
370       node = (rasqal_groupby_tree_node*)raptor_avltree_iterator_get(con->group_iterator);
371       if(!node) {
372         /* No more nodes. finished last group and last row */
373         raptor_free_avltree_iterator(con->group_iterator);
374         con->group_iterator = NULL;
375 
376         raptor_free_avltree(con->tree);
377         con->tree = NULL;
378 
379         /* row = NULL is already set */
380         break;
381       }
382 
383       /* removes row from sequence and this code now owns the reference */
384       row = (rasqal_row*)raptor_sequence_delete_at(node->rows,
385                                                    con->group_row_index++);
386       if(row) {
387         /* Bind the values in the input row to the variables in the table */
388         rasqal_row_bind_variables(row, rowsource->query->vars_table);
389         break;
390       }
391 
392       /* End of sequence so reset row sequence index and advance iterator */
393       con->group_row_index = 0;
394 
395       if(raptor_avltree_iterator_next(con->group_iterator))
396         break;
397     }
398 
399     if(node && row)
400       row->group_id = node->group_id;
401 
402   } else if(con->tree && !con->group_iterator) {
403     /* we found inner rowsource with no rows - generate 1 row */
404     if(!con->offset) {
405       row = rasqal_new_row(rowsource);
406 
407       if(row)
408         row->group_id = 0;
409     }
410   } else {
411     /* no grouping: just pass rows through all in one group */
412     row = rasqal_rowsource_read_row(con->rowsource);
413 
414     if(row)
415       row->group_id = con->group_id;
416   }
417 
418   if(row)
419     row->offset = con->offset++;
420 
421   return row;
422 }
423 
424 
425 static int
rasqal_groupby_rowsource_reset(rasqal_rowsource * rowsource,void * user_data)426 rasqal_groupby_rowsource_reset(rasqal_rowsource* rowsource, void *user_data)
427 {
428   return 0;
429 }
430 
431 
432 static rasqal_rowsource*
rasqal_groupby_rowsource_get_inner_rowsource(rasqal_rowsource * rowsource,void * user_data,int offset)433 rasqal_groupby_rowsource_get_inner_rowsource(rasqal_rowsource* rowsource,
434                                              void *user_data, int offset)
435 {
436   rasqal_groupby_rowsource_context *con;
437   con = (rasqal_groupby_rowsource_context*)user_data;
438 
439   if(offset == 0)
440     return con->rowsource;
441 
442   return NULL;
443 }
444 
445 
446 static const rasqal_rowsource_handler rasqal_groupby_rowsource_handler = {
447   /* .version = */ 1,
448   "groupby",
449   /* .init = */ rasqal_groupby_rowsource_init,
450   /* .finish = */ rasqal_groupby_rowsource_finish,
451   /* .ensure_variables = */ rasqal_groupby_rowsource_ensure_variables,
452   /* .read_row = */ rasqal_groupby_rowsource_read_row,
453   /* .read_all_rows = */ NULL,
454   /* .reset = */ rasqal_groupby_rowsource_reset,
455   /* .set_requirements = */ NULL,
456   /* .get_inner_rowsource = */ rasqal_groupby_rowsource_get_inner_rowsource,
457   /* .set_origin = */ NULL,
458 };
459 
460 
461 /**
462  * rasqal_new_groupby_rowsource:
463  * @world: world object
464  * @query: query object
465  * @rowsource: input rowsource
466  * @exprs_seq: sequence of group by expressions
467  *
468  * INTERNAL - create a new group by rowsource
469  *
470  * the @rowsource becomes owned by the new rowsource
471  *
472  * Return value: new rowsource or NULL on failure
473  */
474 rasqal_rowsource*
rasqal_new_groupby_rowsource(rasqal_world * world,rasqal_query * query,rasqal_rowsource * rowsource,raptor_sequence * exprs_seq)475 rasqal_new_groupby_rowsource(rasqal_world *world,
476                              rasqal_query* query,
477                              rasqal_rowsource* rowsource,
478                              raptor_sequence* exprs_seq)
479 {
480   rasqal_groupby_rowsource_context* con;
481   int flags = 0;
482 
483   if(!world || !query)
484     return NULL;
485 
486   con = RASQAL_CALLOC(rasqal_groupby_rowsource_context*, 1, sizeof(*con));
487   if(!con)
488     return NULL;
489 
490   con->rowsource = rowsource;
491   con->exprs_seq_size = 0;
492 
493   if(exprs_seq) {
494     con->exprs_seq = rasqal_expression_copy_expression_sequence(exprs_seq);
495 
496     if(!con->exprs_seq)
497       goto fail;
498 
499     con->exprs_seq_size = raptor_sequence_size(exprs_seq);
500   }
501 
502   return rasqal_new_rowsource_from_handler(world, query,
503                                            con,
504                                            &rasqal_groupby_rowsource_handler,
505                                            query->vars_table,
506                                            flags);
507 
508   fail:
509 
510   if(rowsource)
511     rasqal_free_rowsource(rowsource);
512   if(exprs_seq)
513     raptor_free_sequence(exprs_seq);
514   if(con)
515     RASQAL_FREE(rasqal_groupby_rowsource_context*, con);
516 
517   return NULL;
518 }
519 
520 
521 #endif /* not STANDALONE */
522 
523 
524 
525 #ifdef STANDALONE
526 
527 /* one more prototype */
528 int main(int argc, char *argv[]);
529 
530 
531 /*
532  * Test 0 and Test 1 test the following example from SPARQL 1.1 Query Draft
533 
534 "
535 For example, given a
536    solution sequence S, ( {?x→2, ?y→3}, {?x→2, ?y→5}, {?x→6, ?y→7} ),
537 
538 Group((?x), S) = {
539   (2) → ( {?x→2, ?y→3}, {?x→2, ?y→5} ),
540   (6) → ( {?x→6, ?y→7} )
541 }
542 "
543 */
544 
545 
546 #define GROUP_TESTS_COUNT 4
547 
548 #define MAX_TEST_GROUPS 100
549 #define MAX_TEST_VARS 5
550 
551 /* Test 0 */
552 static const char* const data_xy_no_rows[] =
553 {
554   /* 2 variable names and 0 rows */
555   "x",  NULL, "y",  NULL,
556   NULL, NULL, NULL, NULL,
557 };
558 
559 /* Test 1 and Test 2 */
560 static const char* const data_xy_3_rows[] =
561 {
562   /* 2 variable names and 3 rows */
563   "x",  NULL, "y",  NULL,
564   /* row 1 data */
565   "2",  NULL, "3",  NULL,
566   /* row 2 data */
567   "2",  NULL, "5",  NULL,
568   /* row 3 data */
569   "6",  NULL, "7",  NULL,
570   /* end of data */
571   NULL, NULL, NULL, NULL,
572 };
573 
574 
575 /* Test 3 */
576 static const char* const data_us_senators_100_rows[] =
577 {
578   /* 3 variable names and 50 rows */
579   "name", NULL,  "state", NULL,  "year", NULL,
580   /* row 1 data */
581   "Al", NULL,  "Minnesota", NULL,  "1951", NULL,
582   "Amy", NULL,  "Minnesota", NULL,  "1960", NULL,
583   "Arlen", NULL,  "Pennsylvania", NULL,  "1930", NULL,
584   "Barbara", NULL,  "California", NULL,  "1940", NULL,
585   "Barbara", NULL,  "Maryland", NULL,  "1936", NULL,
586   "Ben", NULL,  "Maryland", NULL,  "1943", NULL,
587   "Ben", NULL,  "Nebraska", NULL,  "1941", NULL,
588   "Bernie", NULL,  "Vermont", NULL,  "1941", NULL,
589   "Bill", NULL,  "Florida", NULL,  "1942", NULL,
590   "Blanche", NULL,  "Arkansas", NULL,  "1960", NULL,
591   "Bob", NULL,  "Utah", NULL,  "1933", NULL,
592   "Bob", NULL,  "Pennsylvania", NULL,  "1960", NULL,
593   "Bob", NULL,  "Tennessee", NULL,  "1952", NULL,
594   "Bob", NULL,  "New Jersey", NULL,  "1954", NULL,
595   "Byron", NULL,  "North Dakota", NULL,  "1942", NULL,
596   "Carl", NULL,  "Michigan", NULL,  "1934", NULL,
597   "Carte", NULL,  "West Virginia", NULL,  "1974", NULL,
598   "Christopher", NULL,  "Connecticut", NULL,  "1944", NULL,
599   "Chuck", NULL,  "Iowa", NULL,  "1933", NULL,
600   "Chuck", NULL,  "New York", NULL,  "1950", NULL,
601   "Claire", NULL,  "Missouri", NULL,  "1953", NULL,
602   "Daniel", NULL,  "Hawaii", NULL,  "1924", NULL,
603   "Daniel", NULL,  "Hawaii", NULL,  "1924", NULL,
604   "David", NULL,  "Louisiana", NULL,  "1961", NULL,
605   "Debbie", NULL,  "Michigan", NULL,  "1950", NULL,
606   "Dianne", NULL,  "California", NULL,  "1933", NULL,
607   "Dick", NULL,  "Illinois", NULL,  "1944", NULL,
608   "Evan", NULL,  "Indiana", NULL,  "1955", NULL,
609   "Frank", NULL,  "New Jersey", NULL,  "1924", NULL,
610   "George", NULL,  "Florida", NULL,  "1969", NULL,
611   "George", NULL,  "Ohio", NULL,  "1936", NULL,
612   "Harry", NULL,  "Nevada", NULL,  "1939", NULL,
613   "Herb", NULL,  "Wisconsin", NULL,  "1935", NULL,
614   "Jack", NULL,  "Rhode Island", NULL,  "1949", NULL,
615   "Jay", NULL,  "West Virginia", NULL,  "1937", NULL,
616   "Jeanne", NULL,  "New Hampshire", NULL,  "1947", NULL,
617   "Jeff", NULL,  "New Mexico", NULL,  "1943", NULL,
618   "Jeff", NULL,  "Oregon", NULL,  "1956", NULL,
619   "Jeff", NULL,  "Alabama", NULL,  "1946", NULL,
620   "Jim", NULL,  "Kentucky", NULL,  "1931", NULL,
621   "Jim", NULL,  "South Carolina", NULL,  "1951", NULL,
622   "Jim", NULL,  "Oklahoma", NULL,  "1934", NULL,
623   "Jim", NULL,  "Idaho", NULL,  "1943", NULL,
624   "Jim", NULL,  "Virginia", NULL,  "1946", NULL,
625   "Joe", NULL,  "Connecticut", NULL,  "1942", NULL,
626   "John", NULL,  "Wyoming", NULL,  "1952", NULL,
627   "John", NULL,  "Texas", NULL,  "1952", NULL,
628   "John", NULL,  "Nevada", NULL,  "1958", NULL,
629   "John", NULL,  "Massachusetts", NULL,  "1943", NULL,
630   "John", NULL,  "Arizona", NULL,  "1936", NULL,
631   "John", NULL,  "South Dakota", NULL,  "1961", NULL,
632   "Johnny", NULL,  "Georgia", NULL,  "1944", NULL,
633   "Jon", NULL,  "Arizona", NULL,  "1942", NULL,
634   "Jon", NULL,  "Montana", NULL,  "1956", NULL,
635   "Judd", NULL,  "New Hampshire", NULL,  "1947", NULL,
636   "Kay", NULL,  "Texas", NULL,  "1943", NULL,
637   "Kay", NULL,  "North Carolina", NULL,  "1953", NULL,
638   "Kent", NULL,  "North Dakota", NULL,  "1948", NULL,
639   "Kirsten", NULL,  "New York", NULL,  "1966", NULL,
640   "Kit", NULL,  "Missouri", NULL,  "1939", NULL,
641   "Lamar", NULL,  "Tennessee", NULL,  "1940", NULL,
642   "Lindsey", NULL,  "South Carolina", NULL,  "1955", NULL,
643   "Lisa", NULL,  "Alaska", NULL,  "1957", NULL,
644   "Maria", NULL,  "Washington", NULL,  "1958", NULL,
645   "Mark", NULL,  "Alaska", NULL,  "1962", NULL,
646   "Mark", NULL,  "Arkansas", NULL,  "1963", NULL,
647   "Mark", NULL,  "Colorado", NULL,  "1950", NULL,
648   "Mark", NULL,  "Virginia", NULL,  "1954", NULL,
649   "Mary", NULL,  "Louisiana", NULL,  "1955", NULL,
650   "Max", NULL,  "Montana", NULL,  "1941", NULL,
651   "Michael", NULL,  "Colorado", NULL,  "1964", NULL,
652   "Mike", NULL,  "Idaho", NULL,  "1951", NULL,
653   "Mike", NULL,  "Wyoming", NULL,  "1944", NULL,
654   "Mike", NULL,  "Nebraska", NULL,  "1950", NULL,
655   "Mitch", NULL,  "Kentucky", NULL,  "1942", NULL,
656   "Olympia", NULL,  "Maine", NULL,  "1947", NULL,
657   "Orrin", NULL,  "Utah", NULL,  "1934", NULL,
658   "Pat", NULL,  "Kansas", NULL,  "1936", NULL,
659   "Patrick", NULL,  "Vermont", NULL,  "1940", NULL,
660   "Patty", NULL,  "Washington", NULL,  "1950", NULL,
661   "Richard", NULL,  "North Carolina", NULL,  "1955", NULL,
662   "Richard", NULL,  "Indiana", NULL,  "1932", NULL,
663   "Richard", NULL,  "Alabama", NULL,  "1934", NULL,
664   "Roger", NULL,  "Mississippi", NULL,  "1951", NULL,
665   "Roland", NULL,  "Illinois", NULL,  "1937", NULL,
666   "Ron", NULL,  "Oregon", NULL,  "1949", NULL,
667   "Russ", NULL,  "Wisconsin", NULL,  "1953", NULL,
668   "Sam", NULL,  "Kansas", NULL,  "1956", NULL,
669   "Saxby", NULL,  "Georgia", NULL,  "1943", NULL,
670   "Scott", NULL,  "Massachusetts", NULL,  "1959", NULL,
671   "Sheldon", NULL,  "Rhode Island", NULL,  "1955", NULL,
672   "Sherrod", NULL,  "Ohio", NULL,  "1952", NULL,
673   "Susan", NULL,  "Maine", NULL,  "1952", NULL,
674   "Ted", NULL,  "Delaware", NULL,  "1939", NULL,
675   "Thad", NULL,  "Mississippi", NULL,  "1937", NULL,
676   "Tim", NULL,  "South Dakota", NULL,  "1946", NULL,
677   "Tom", NULL,  "Delaware", NULL,  "1947", NULL,
678   "Tom", NULL,  "Oklahoma", NULL,  "1948", NULL,
679   "Tom", NULL,  "Iowa", NULL,  "1939", NULL,
680   "Tom", NULL,  "New Mexico", NULL,  "1948", NULL,
681   /* end of data */
682   NULL, NULL, NULL, NULL, NULL, NULL,
683 };
684 
685 
686 /* Group IDs expected */
687 /* Test 0 */
688 static const int test0_groupids[] = {
689   0
690 };
691 
692 /* Test 1 */
693 static const int test1_groupids[] = {
694   0, 0, 0
695 };
696 
697 /* Test 2 */
698 static const int test2_groupids[] = {
699   0, 0, 1
700 };
701 
702 
703 /* Raptor AVL Tree - Enumerated by order in AVL Tree which is sorted by expression list */
704 static const int results_us_senators_97_groups[] =
705   { 21, 21, 27, 2, 38, 79, 10, 18, 24, 15, 40, 74, 80, 31, 4, 29, 47, 75, 33, 82, 92, 30, 57, 91, 96, 3, 58, 76, 6, 7, 67, 8, 14, 43, 50, 72, 5, 35, 41, 46, 53, 86, 17, 25, 49, 70, 37, 42, 93, 34, 52, 73, 94, 55, 95, 95, 32, 83, 19, 23, 64, 71, 77, 0, 39, 69, 81, 12, 44, 44, 89, 90, 20, 54, 84, 13, 65, 26, 59, 66, 78, 88, 36, 51, 85, 60, 45, 61, 87, 1, 9, 11, 22, 48, 62, 63, 68, 56, 28, 16 };
706 
707 
708 
709 static const struct {
710   int vars;
711   int rows;
712   int ngroups;
713   const char* const *data;
714   const int *group_ids;
715   const char* const expr_vars[MAX_TEST_VARS];
716 } test_data[GROUP_TESTS_COUNT] = {
717   /* Test 0: No GROUP BY : 1 group expected with NULL values */
718   {2, 1, 1, data_xy_no_rows, test0_groupids, { "x", NULL } },
719 
720   /* Test 1: No GROUP BY : 1 group expected */
721   {2, 3, 1, data_xy_3_rows, test1_groupids, { NULL } },
722 
723   /* Test 2: GROUP BY ?x : 2 groups expected */
724   {2, 3, 2, data_xy_3_rows, test2_groupids, { "x", NULL } },
725 
726   /* Test 3: GROUP BY ?year, ?name : 97 groups expected */
727   {3, 100, 97, data_us_senators_100_rows, results_us_senators_97_groups, { "year", "name", NULL } },
728 
729 };
730 
731 
732 
733 int
main(int argc,char * argv[])734 main(int argc, char *argv[])
735 {
736   const char *program = rasqal_basename(argv[0]);
737   rasqal_rowsource *rowsource = NULL;
738   rasqal_world* world = NULL;
739   rasqal_query* query = NULL;
740   raptor_sequence* row_seq = NULL;
741   raptor_sequence* exprs_seq = NULL;
742   int failures = 0;
743   rasqal_variables_table* vt;
744   rasqal_rowsource *input_rs = NULL;
745   int vars_count;
746   raptor_sequence* vars_seq = NULL;
747   int test_id;
748 
749   world = rasqal_new_world();
750   if(!world || rasqal_world_open(world)) {
751     fprintf(stderr, "%s: rasqal_world init failed\n", program);
752     return(1);
753   }
754 
755   query = rasqal_new_query(world, "sparql", NULL);
756 
757   vt = query->vars_table;
758 
759   for(test_id = 0; test_id < GROUP_TESTS_COUNT; test_id++) {
760     int expected_rows_count = test_data[test_id].rows;
761     int expected_vars_count = test_data[test_id].vars;
762     const int* expected_group_ids = test_data[test_id].group_ids;
763     int expected_ngroups = test_data[test_id].ngroups;
764     raptor_sequence* seq = NULL;
765     int count;
766     int size;
767     int i;
768     int groups_counted;
769     int last_group_id;
770 
771     vars_count = expected_vars_count;
772     row_seq = rasqal_new_row_sequence(world, vt, test_data[test_id].data,
773                                       vars_count, &vars_seq);
774     if(row_seq) {
775       input_rs = rasqal_new_rowsequence_rowsource(world, query, vt,
776                                                   row_seq, vars_seq);
777       /* vars_seq and row_seq are now owned by input_rs */
778       vars_seq = row_seq = NULL;
779     }
780     if(!input_rs) {
781       fprintf(stderr, "%s: failed to create rowsequence rowsource\n", program);
782       failures++;
783       goto tidy;
784     }
785 
786 
787     exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
788                                    (raptor_data_print_handler)rasqal_expression_print);
789 
790     if(test_data[test_id].expr_vars[0] != NULL) {
791       int vindex;
792       const unsigned char* var_name;
793 
794       for(vindex = 0;
795           (var_name = RASQAL_GOOD_CAST(const unsigned char*, test_data[test_id].expr_vars[vindex] ));
796           vindex++) {
797         rasqal_variable* v;
798         rasqal_literal *l = NULL;
799         rasqal_expression* e = NULL;
800 
801         v = rasqal_variables_table_get_by_name(vt, RASQAL_VARIABLE_TYPE_NORMAL,
802                                                var_name);
803         /* returns SHARED pointer to variable */
804         if(v) {
805           v = rasqal_new_variable_from_variable(v);
806           l = rasqal_new_variable_literal(world, v);
807         }
808 
809         if(l)
810           e = rasqal_new_literal_expression(world, l);
811 
812         if(e)
813           raptor_sequence_push(exprs_seq, e);
814         else {
815           fprintf(stderr, "%s: failed to create variable %s\n", program,
816                   RASQAL_GOOD_CAST(const char*, var_name));
817           failures++;
818           goto tidy;
819         }
820 
821       }
822     }
823 
824     rowsource = rasqal_new_groupby_rowsource(world, query, input_rs, exprs_seq);
825     /* input_rs is now owned by rowsource */
826     input_rs = NULL;
827 
828     if(!rowsource) {
829       fprintf(stderr, "%s: failed to create groupby rowsource\n", program);
830       failures++;
831       goto tidy;
832     }
833 
834     seq = rasqal_rowsource_read_all_rows(rowsource);
835     if(!seq) {
836       fprintf(stderr,
837               "%s: test %d rasqal_rowsource_read_all_rows() returned a NULL seq for a groupby rowsource\n",
838               program, test_id);
839       failures++;
840       goto tidy;
841     }
842     count = raptor_sequence_size(seq);
843     if(count != expected_rows_count) {
844       fprintf(stderr,
845               "%s: test %d rasqal_rowsource_read_all_rows() returned %d rows for a groupby rowsource, expected %d\n",
846               program, test_id, count, expected_rows_count);
847       failures++;
848       goto tidy;
849     }
850 
851     size = rasqal_rowsource_get_size(rowsource);
852     if(size != expected_vars_count) {
853       fprintf(stderr,
854               "%s: test %d rasqal_rowsource_get_size() returned %d columns (variables) for a groupby rowsource, expected %d\n",
855               program, test_id, size, expected_vars_count);
856       failures++;
857       goto tidy;
858     }
859 
860     groups_counted = 0;
861     last_group_id = -1;
862     for(i = 0; i < count; i++) {
863       rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
864 
865       if(row->group_id != last_group_id) {
866         groups_counted++;
867         last_group_id = row->group_id;
868       }
869 
870       if(row->group_id != expected_group_ids[i]) {
871         fprintf(stderr, "%s: test %d row #%d has group_id %d, expected %d\n",
872                 program, test_id, i, row->group_id, expected_group_ids[i]);
873         failures++;
874         goto tidy;
875       }
876 
877     }
878 
879     if(groups_counted != expected_ngroups) {
880         fprintf(stderr, "%s: test %d returnd %d groups, expected %d\n",
881                 program, test_id, groups_counted, expected_ngroups);
882         failures++;
883         goto tidy;
884       }
885 
886 #ifdef RASQAL_DEBUG
887     rasqal_rowsource_print_row_sequence(rowsource, seq, stderr);
888 #endif
889 
890     raptor_free_sequence(seq); seq = NULL;
891 
892     rasqal_free_rowsource(rowsource); rowsource = NULL;
893 
894     if(exprs_seq)
895       raptor_free_sequence(exprs_seq);
896     exprs_seq = NULL;
897   }
898 
899   tidy:
900   if(exprs_seq)
901     raptor_free_sequence(exprs_seq);
902   if(rowsource)
903     rasqal_free_rowsource(rowsource);
904   if(input_rs)
905     rasqal_free_rowsource(input_rs);
906   if(query)
907     rasqal_free_query(query);
908   if(world)
909     rasqal_free_world(world);
910 
911   return failures;
912 }
913 
914 #endif /* STANDALONE */
915