1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * rasqal_rowsource_aggregation.c - Rasqal aggregation rowsource class
4 *
5 * Handles SPARQL Aggregation() algebra including Distinct of
6 * expression arguments.
7 *
8 * Copyright (C) 2010, David Beckett http://www.dajobe.org/
9 *
10 * This package is Free Software and part of Redland http://librdf.org/
11 *
12 * It is licensed under the following three licenses as alternatives:
13 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
14 * 2. GNU General Public License (GPL) V2 or any newer version
15 * 3. Apache License, V2.0 or any newer version
16 *
17 * You may not use this file except in compliance with at least one of
18 * the above three licenses.
19 *
20 * See LICENSE.html or LICENSE.txt at the top of this package for the
21 * complete terms and further detail along with the license texts for
22 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
23 *
24 */
25
26
27 #ifdef HAVE_CONFIG_H
28 #include <rasqal_config.h>
29 #endif
30
31 #ifdef WIN32
32 #include <win32_rasqal_config.h>
33 #endif
34
35 #include <stdio.h>
36 #include <string.h>
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40
41 #include <raptor.h>
42
43 #include "rasqal.h"
44 #include "rasqal_internal.h"
45
46
47 #define DEBUG_FH stderr
48
49
50 #ifndef STANDALONE
51
52
53 /*
54 * rasqal_agg_expr_data:
55 *
56 * INTERNAL - data for defining an agg expression input args / output var/values
57 *
58 * This is separate from #rasqal_builtin_agg_expression_execute which contains
59 * information only needed during execution.
60 */
61 typedef struct
62 {
63 /* agg expression */
64 rasqal_expression* expr;
65
66 /* aggregation function execution user data as created by
67 * rasqal_builtin_agg_expression_execute_init() and destroyed by
68 * rasqal_builtin_agg_expression_execute_finish().
69 */
70 void* agg_user_data;
71
72 /* (shared) output variable for this expression pointing into
73 * aggregation rowsource context vars_seq */
74 rasqal_variable* variable;
75
76 /* sequence of aggregate function arguments */
77 raptor_sequence* exprs_seq;
78
79 /* map for distincting literal values */
80 rasqal_map* map;
81 } rasqal_agg_expr_data;
82
83
84 /*
85 * rasqal_aggregation_rowsource_context:
86 *
87 * INTERNAL - Aggregration rowsource context
88 *
89 * Structure for handing aggregation over a grouped input rowsource
90 * created by rasqal_new_aggregation_rowsource().
91 *
92 */
93 typedef struct
94 {
95 /* inner (grouped) rowsource */
96 rasqal_rowsource *rowsource;
97
98 /* aggregate expressions */
99 raptor_sequence* exprs_seq;
100
101 /* output variables to bind (in order) */
102 raptor_sequence* vars_seq;
103
104 /* pointer to array of data per aggregate expression */
105 rasqal_agg_expr_data* expr_data;
106
107 /* number of agg expressions (size of exprs_seq, vars_seq, expr_data) */
108 int expr_count;
109
110 /* non-0 when done */
111 int finished;
112
113 /* last group ID seen */
114 int last_group_id;
115
116 /* saved row between group boundaries */
117 rasqal_row* saved_row;
118
119 /* output row offset */
120 int offset;
121
122 /* sequence of values from input rowsource to copy/sample through */
123 raptor_sequence* input_values;
124
125 /* number of variables/values on input rowsource to copy/sample through
126 * (size of @input_values) */
127 int input_values_count;
128
129 /* step into current group */
130 int step_count;
131 } rasqal_aggregation_rowsource_context;
132
133
134 /*
135 * rasqal_builtin_agg_expression_execute:
136 *
137 * INTERNAL - state for built-in execution of certain aggregate expressions
138 *
139 * Executes AVG, COUNT, GROUP_CONCAT, MAX, MIN, SAMPLE
140 *
141 */
142 typedef struct
143 {
144 rasqal_world* world;
145
146 /* expression being executed */
147 rasqal_expression* expr;
148
149 /* literal for computation (e.g. current MAX, MIN seen) */
150 rasqal_literal* l;
151
152 /* number of steps executed - used for AVG in calculating result */
153 int count;
154
155 /* error happened */
156 int error;
157
158 /* separator for GROUP_CONCAT */
159 unsigned char separator[2];
160
161 /* string buffer for GROUP_CONCAT */
162 raptor_stringbuffer *sb;
163 } rasqal_builtin_agg_expression_execute;
164
165
166 static void rasqal_builtin_agg_expression_execute_finish(void* user_data);
167
168
169 static void*
rasqal_builtin_agg_expression_execute_init(rasqal_world * world,rasqal_expression * expr)170 rasqal_builtin_agg_expression_execute_init(rasqal_world *world,
171 rasqal_expression* expr)
172 {
173 rasqal_builtin_agg_expression_execute* b;
174
175 b = RASQAL_CALLOC(rasqal_builtin_agg_expression_execute*, 1, sizeof(*b));
176 if(!b)
177 return NULL;
178
179 b->expr = expr;
180 b->world = world;
181 b->l = NULL;
182 b->count = 0;
183 b->error = 0;
184
185 if(expr->op == RASQAL_EXPR_GROUP_CONCAT) {
186 b->sb = raptor_new_stringbuffer();
187 if(!b->sb) {
188 rasqal_builtin_agg_expression_execute_finish(b);
189 return NULL;
190 }
191
192 b->separator[0] = (unsigned char)' ';
193 b->separator[1] = (unsigned char)'\0';
194 }
195
196 return b;
197 }
198
199
200 static void
rasqal_builtin_agg_expression_execute_finish(void * user_data)201 rasqal_builtin_agg_expression_execute_finish(void* user_data)
202 {
203 rasqal_builtin_agg_expression_execute* b;
204
205 b = (rasqal_builtin_agg_expression_execute*)user_data;
206
207 if(b->l)
208 rasqal_free_literal(b->l);
209
210 if(b->sb)
211 raptor_free_stringbuffer(b->sb);
212
213 RASQAL_FREE(rasqal_builtin_agg_expression_execute, b);
214 }
215
216
217 static int
rasqal_builtin_agg_expression_execute_reset(void * user_data)218 rasqal_builtin_agg_expression_execute_reset(void* user_data)
219 {
220 rasqal_builtin_agg_expression_execute* b;
221
222 b = (rasqal_builtin_agg_expression_execute*)user_data;
223
224 b->count = 0;
225 b->error = 0;
226
227 if(b->l) {
228 rasqal_free_literal(b->l);
229 b->l = 0;
230 }
231
232 if(b->sb) {
233 raptor_free_stringbuffer(b->sb);
234 b->sb = raptor_new_stringbuffer();
235 if(!b->sb)
236 return 1;
237 }
238
239 return 0;
240 }
241
242
243 static int
rasqal_builtin_agg_expression_execute_step(void * user_data,raptor_sequence * literals)244 rasqal_builtin_agg_expression_execute_step(void* user_data,
245 raptor_sequence* literals)
246 {
247 rasqal_builtin_agg_expression_execute* b;
248 rasqal_literal* l;
249 int i;
250
251 b = (rasqal_builtin_agg_expression_execute*)user_data;
252
253 if(b->error)
254 return b->error;
255
256 if(b->expr->op == RASQAL_EXPR_COUNT) {
257 /* COUNT(*) : counts every row (does not care about literals) */
258 if(b->expr->arg1->op == RASQAL_EXPR_VARSTAR)
259 b->count++;
260 /* COUNT(expr list) : counts rows with non-empty sequence of literals */
261 else if(raptor_sequence_size(literals) > 0)
262 b->count++;
263
264 return 0;
265 }
266
267
268 /* Other aggregate functions count every row */
269 b->count++;
270
271 for(i = 0; (l = (rasqal_literal*)raptor_sequence_get_at(literals, i)); i++) {
272 rasqal_literal* result = NULL;
273
274 if(b->expr->op == RASQAL_EXPR_SAMPLE) {
275 /* Sample chooses the first literal it sees */
276 if(!b->l)
277 b->l = rasqal_new_literal_from_literal(l);
278
279 break;
280 }
281
282 if(b->expr->op == RASQAL_EXPR_GROUP_CONCAT) {
283 const unsigned char* str;
284 int error = 0;
285
286 str = RASQAL_GOOD_CAST(const unsigned char*, rasqal_literal_as_string_flags(l, 0, &error));
287
288 if(!error) {
289 if(raptor_stringbuffer_length(b->sb))
290 raptor_stringbuffer_append_counted_string(b->sb, b->separator, 1, 1);
291
292 raptor_stringbuffer_append_string(b->sb, str, 1);
293 }
294 continue;
295 }
296
297
298 if(!b->l)
299 result = rasqal_new_literal_from_literal(l);
300 else {
301 if(b->expr->op == RASQAL_EXPR_SUM || b->expr->op == RASQAL_EXPR_AVG) {
302 result = rasqal_literal_add(b->l, l, &b->error);
303 } else if(b->expr->op == RASQAL_EXPR_MIN) {
304 int cmp = rasqal_literal_compare(b->l, l, 0, &b->error);
305 if(cmp <= 0)
306 result = rasqal_new_literal_from_literal(b->l);
307 else
308 result = rasqal_new_literal_from_literal(l);
309 } else if(b->expr->op == RASQAL_EXPR_MAX) {
310 int cmp = rasqal_literal_compare(b->l, l, 0, &b->error);
311 if(cmp >= 0)
312 result = rasqal_new_literal_from_literal(b->l);
313 else
314 result = rasqal_new_literal_from_literal(l);
315 } else {
316 RASQAL_FATAL2("Builtin aggregation operation %u is not implemented",
317 b->expr->op);
318 }
319
320 rasqal_free_literal(b->l);
321
322 if(!result)
323 b->error = 1;
324 }
325
326 b->l = result;
327
328 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
329 RASQAL_DEBUG3("Aggregation step result %s (error=%d)\n",
330 (result ? RASQAL_GOOD_CAST(const char*, rasqal_literal_as_string(result)) : "(NULL)"),
331 b->error);
332 #endif
333 }
334
335 return b->error;
336 }
337
338
339 static rasqal_literal*
rasqal_builtin_agg_expression_execute_result(void * user_data)340 rasqal_builtin_agg_expression_execute_result(void* user_data)
341 {
342 rasqal_builtin_agg_expression_execute* b;
343
344 b = (rasqal_builtin_agg_expression_execute*)user_data;
345
346 if(b->error)
347 return NULL;
348
349 if(b->expr->op == RASQAL_EXPR_COUNT) {
350 rasqal_literal* result;
351
352 result = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
353 b->count);
354 return result;
355 }
356
357 if(b->expr->op == RASQAL_EXPR_GROUP_CONCAT) {
358 size_t len;
359 unsigned char* str;
360 rasqal_literal* result;
361
362 len = raptor_stringbuffer_length(b->sb);
363 str = RASQAL_MALLOC(unsigned char*, len + 1);
364 if(!str)
365 return NULL;
366
367 if(raptor_stringbuffer_copy_to_string(b->sb, str, len)) {
368 RASQAL_FREE(char*, str);
369 return NULL;
370 }
371
372 result = rasqal_new_string_literal(b->world, str, NULL, NULL, NULL);
373
374 return result;
375 }
376
377
378 if(b->expr->op == RASQAL_EXPR_AVG) {
379 rasqal_literal* count_l = NULL;
380 rasqal_literal* result = NULL;
381
382 if(b->count)
383 count_l = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
384 b->count);
385
386 if(b->l && count_l)
387 result = rasqal_literal_divide(b->l, count_l, &b->error);
388 else
389 /* No total to divide */
390 b->error = 1;
391 if(count_l)
392 rasqal_free_literal(count_l);
393
394 if(b->error) {
395 /* result will be NULL and error will be non-0 on division by 0
396 * in which case the result is literal(integer 0)
397 */
398 result = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
399 0);
400 }
401
402 return result;
403 }
404
405 return rasqal_new_literal_from_literal(b->l);
406 }
407
408
409
410 static int
rasqal_aggregation_rowsource_init(rasqal_rowsource * rowsource,void * user_data)411 rasqal_aggregation_rowsource_init(rasqal_rowsource* rowsource, void *user_data)
412 {
413 rasqal_aggregation_rowsource_context* con;
414
415 con = (rasqal_aggregation_rowsource_context*)user_data;
416
417 con->input_values = raptor_new_sequence((raptor_data_free_handler)rasqal_free_literal,
418 (raptor_data_print_handler)rasqal_literal_print);
419
420
421 con->last_group_id = -1;
422 con->offset = 0;
423 con->step_count = 0;
424
425 if(rasqal_rowsource_request_grouping(con->rowsource))
426 return 1;
427
428 return 0;
429 }
430
431
432 static int
rasqal_aggregation_rowsource_finish(rasqal_rowsource * rowsource,void * user_data)433 rasqal_aggregation_rowsource_finish(rasqal_rowsource* rowsource,
434 void *user_data)
435 {
436 rasqal_aggregation_rowsource_context* con;
437
438 con = (rasqal_aggregation_rowsource_context*)user_data;
439
440 if(con->expr_data) {
441 int i;
442
443 for(i = 0; i < con->expr_count; i++) {
444 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
445
446 if(expr_data->agg_user_data)
447 rasqal_builtin_agg_expression_execute_finish(expr_data->agg_user_data);
448
449 if(expr_data->exprs_seq)
450 raptor_free_sequence(expr_data->exprs_seq);
451
452 if(expr_data->expr)
453 rasqal_free_expression(expr_data->expr);
454
455 if(expr_data->map)
456 rasqal_free_map(expr_data->map);
457 }
458
459 RASQAL_FREE(rasqal_agg_expr_data, con->expr_data);
460 }
461
462 if(con->exprs_seq)
463 raptor_free_sequence(con->exprs_seq);
464
465 if(con->vars_seq)
466 raptor_free_sequence(con->vars_seq);
467
468 if(con->rowsource)
469 rasqal_free_rowsource(con->rowsource);
470
471 if(con->saved_row)
472 rasqal_free_row(con->saved_row);
473
474 if(con->input_values)
475 raptor_free_sequence(con->input_values);
476
477 RASQAL_FREE(rasqal_aggregation_rowsource_context, con);
478
479 return 0;
480 }
481
482
483 static int
rasqal_aggregation_rowsource_ensure_variables(rasqal_rowsource * rowsource,void * user_data)484 rasqal_aggregation_rowsource_ensure_variables(rasqal_rowsource* rowsource,
485 void *user_data)
486 {
487 rasqal_aggregation_rowsource_context* con;
488 int offset;
489 int i;
490
491 con = (rasqal_aggregation_rowsource_context*)user_data;
492
493 if(rasqal_rowsource_ensure_variables(con->rowsource))
494 return 1;
495
496 rowsource->size = 0;
497
498 if(rasqal_rowsource_copy_variables(rowsource, con->rowsource))
499 return 1;
500
501 con->input_values_count = rowsource->size;
502
503 for(i = 0; i < con->expr_count; i++) {
504 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
505
506 offset = rasqal_rowsource_add_variable(rowsource, expr_data->variable);
507 if(offset < 0)
508 return 1;
509 }
510
511 return 0;
512 }
513
514
515 static rasqal_row*
rasqal_aggregation_rowsource_read_row(rasqal_rowsource * rowsource,void * user_data)516 rasqal_aggregation_rowsource_read_row(rasqal_rowsource* rowsource,
517 void *user_data)
518 {
519 rasqal_aggregation_rowsource_context* con;
520 rasqal_row* row;
521 int error = 0;
522
523 con = (rasqal_aggregation_rowsource_context*)user_data;
524
525 if(con->finished)
526 return NULL;
527
528
529 /* Iterate over input rows until last row seen or group done */
530 while(1) {
531 error = 0;
532
533 if(con->saved_row)
534 row = con->saved_row;
535 else
536 row = rasqal_rowsource_read_row(con->rowsource);
537
538 if(!row) {
539 /* End of input - calculate last aggregation result */
540 con->finished = 1;
541 break;
542 }
543
544
545 if(con->last_group_id != row->group_id) {
546 int i;
547
548 if(!con->saved_row && con->last_group_id >= 0) {
549 /* Existing aggregation is done - return result */
550
551 /* save current row for next time this function is called */
552 con->saved_row = row;
553
554 row = NULL;
555 #ifdef RASQAL_DEBUG
556 RASQAL_DEBUG2("Aggregation ending group %d", con->last_group_id);
557 fputc('\n', DEBUG_FH);
558 #endif
559
560 /* Empty distinct maps */
561 for(i = 0; i < con->expr_count; i++) {
562 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
563
564 if(expr_data->map) {
565 rasqal_free_map(expr_data->map);
566 expr_data->map = NULL;
567 }
568 }
569
570 break;
571 }
572
573 /* reference is now in 'row' variable */
574 con->saved_row = NULL;
575
576 #ifdef RASQAL_DEBUG
577 RASQAL_DEBUG2("Aggregation starting group %d", row->group_id);
578 fputc('\n', DEBUG_FH);
579 #endif
580
581
582 /* next time this function is called we continue here */
583
584 for(i = 0; i < con->expr_count; i++) {
585 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
586
587 if(!expr_data->agg_user_data) {
588 /* init once */
589 expr_data->agg_user_data = rasqal_builtin_agg_expression_execute_init(rowsource->world,
590 expr_data->expr);
591
592 if(!expr_data->agg_user_data) {
593 error = 1;
594 break;
595 }
596 }
597
598 /* Init map for each group */
599 if(expr_data->expr->flags & RASQAL_EXPR_FLAG_DISTINCT) {
600 expr_data->map = rasqal_new_literal_sequence_sort_map(1 /* is_distinct */,
601 0 /* compare_flags */);
602 if(!expr_data->map) {
603 error = 1;
604 break;
605 }
606 }
607 }
608
609 if(error)
610 break;
611
612 con->last_group_id = row->group_id;
613 } /* end if handling change of group ID */
614
615
616 /* Bind the values in the input row to the variables in the table */
617 rasqal_row_bind_variables(row, rowsource->query->vars_table);
618
619 /* Evaluate the expressions giving a sequence of literals to
620 * run the aggregation step over.
621 */
622 if(1) {
623 int i;
624
625 if(!con->step_count) {
626 /* copy first value row from input rowsource */
627 for(i = 0; i < con->input_values_count; i++) {
628 rasqal_literal* value;
629
630 value = rasqal_new_literal_from_literal(row->values[i]);
631 raptor_sequence_set_at(con->input_values, i, value);
632 }
633 }
634
635 con->step_count++;
636
637 for(i = 0; i < con->expr_count; i++) {
638 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
639 raptor_sequence* seq;
640
641 /* SPARQL Aggregation uses ListEvalE() to evaluate - ignoring
642 * errors and filtering out expressions that fail
643 */
644 seq = rasqal_expression_sequence_evaluate(rowsource->query,
645 expr_data->exprs_seq,
646 /* ignore_errors */ 1,
647 &error);
648 if(error)
649 continue;
650
651 if(expr_data->map) {
652 if(rasqal_literal_sequence_sort_map_add_literal_sequence(expr_data->map,
653 seq)) {
654 /* duplicate found
655 *
656 * The above function just freed seq so no data is lost
657 */
658 continue;
659 }
660 }
661
662 #ifdef RASQAL_DEBUG
663 RASQAL_DEBUG2("Aggregation expr %d step over literals: ", i);
664 raptor_sequence_print(seq, DEBUG_FH);
665 fputc('\n', DEBUG_FH);
666 #endif
667
668 error = rasqal_builtin_agg_expression_execute_step(expr_data->agg_user_data,
669 seq);
670 /* when DISTINCTing, seq remains owned by the map
671 * otherwise seq is local and must be freed
672 */
673 if(!expr_data->map)
674 raptor_free_sequence(seq);
675
676 if(error) {
677 RASQAL_DEBUG2("Aggregation expr %d returned error\n", i);
678 error = 0;
679 }
680 }
681 }
682
683 rasqal_free_row(row); row = NULL;
684
685 if(error)
686 break;
687
688 } /* end while reading rows */
689
690
691 if(error) {
692 /* Discard row on error */
693 if(row) {
694 rasqal_free_row(row);
695 row = NULL;
696 }
697 } else if (con->last_group_id >= 0) {
698 int offset = 0;
699 int i;
700
701 /* Generate result row and reset for next group */
702 row = rasqal_new_row(rowsource);
703
704 /* Copy scalar results through */
705 for(i = 0; i < con->input_values_count; i++) {
706 rasqal_literal* result;
707
708 /* Reset: get and delete any stored input rowsource literal */
709 result = (rasqal_literal*)raptor_sequence_delete_at(con->input_values, i);
710
711 rasqal_row_set_value_at(row, offset, result);
712 rasqal_free_literal(result);
713
714 offset++;
715 }
716
717
718 /* Set aggregate results */
719 for(i = 0; i < con->expr_count; i++) {
720 rasqal_literal* result;
721 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
722 rasqal_variable* v;
723
724 /* Calculate the result because the input ended or a new group started */
725 result = rasqal_builtin_agg_expression_execute_result(expr_data->agg_user_data);
726
727 #ifdef RASQAL_DEBUG
728 RASQAL_DEBUG2("Aggregation %d ending group with result: ", i);
729 rasqal_literal_print(result, DEBUG_FH);
730 fputc('\n', DEBUG_FH);
731 #endif
732
733 v = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
734 result = rasqal_new_literal_from_literal(result);
735 /* it is OK to bind to NULL */
736 rasqal_variable_set_value(v, result);
737
738 rasqal_row_set_value_at(row, offset, result);
739
740 if(result)
741 rasqal_free_literal(result);
742
743 offset++;
744
745 if(rasqal_builtin_agg_expression_execute_reset(expr_data->agg_user_data)) {
746 rasqal_free_row(row);
747 row = NULL;
748 break;
749 }
750 }
751
752 con->step_count = 0;
753
754 if(row)
755 row->offset = con->offset++;
756 }
757
758
759 return row;
760 }
761
762
763 static rasqal_rowsource*
rasqal_aggregation_rowsource_get_inner_rowsource(rasqal_rowsource * rowsource,void * user_data,int offset)764 rasqal_aggregation_rowsource_get_inner_rowsource(rasqal_rowsource* rowsource,
765 void *user_data, int offset)
766 {
767 rasqal_aggregation_rowsource_context *con;
768 con = (rasqal_aggregation_rowsource_context*)user_data;
769
770 if(offset == 0)
771 return con->rowsource;
772
773 return NULL;
774 }
775
776
777 static const rasqal_rowsource_handler rasqal_aggregation_rowsource_handler = {
778 /* .version = */ 1,
779 "aggregation",
780 /* .init = */ rasqal_aggregation_rowsource_init,
781 /* .finish = */ rasqal_aggregation_rowsource_finish,
782 /* .ensure_variables = */ rasqal_aggregation_rowsource_ensure_variables,
783 /* .read_row = */ rasqal_aggregation_rowsource_read_row,
784 /* .read_all_rows = */ NULL,
785 /* .reset = */ NULL,
786 /* .set_requirements = */ NULL,
787 /* .get_inner_rowsource = */ rasqal_aggregation_rowsource_get_inner_rowsource,
788 /* .set_origin = */ NULL,
789 };
790
791
792 /**
793 * rasqal_new_aggregation_rowsource:
794 * @world: world
795 * @query: query
796 * @rowsource: input (grouped) rowsource - typically constructed by rasqal_new_groupby_rowsource()
797 * @exprs_seq: sequence of #rasqal_expression
798 * @vars_seq: sequence of #rasqal_variable to bind in output rows
799 *
800 * INTERNAL - Create a new rowsource for a aggregration
801 *
802 * The @rowsource becomes owned by the new rowsource. The @exprs_seq
803 * and @vars_seq are not.
804 *
805 * For example with the SPARQL 1.1 example queries
806 *
807 * SELECT (MAX(?y) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
808 * the aggregation part corresponds to
809 * exprs_seq : [ expr MAX with sequence of expression args [?y] }
810 * vars_seq : [ {internal variable name} ]
811 *
812 * SELECT (ex:agg(?y, ?z) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
813 * the aggregation part corresponds to
814 * exprs_seq : [ expr ex:agg with sequence of expression args [?y, ?z] ]
815 * vars_seq : [ {internal variable name} ]
816 *
817 * SELECT ?x, (MIN(?z) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
818 * the aggregation part corresponds to
819 * exprs_seq : [ non-aggregate expression ?x,
820 * expr MIN with sequence of expression args [?z] ]
821 * vars_seq : [ ?x, {internal variable name} ]
822 *
823 * Return value: new rowsource or NULL on failure
824 */
825
826 rasqal_rowsource*
rasqal_new_aggregation_rowsource(rasqal_world * world,rasqal_query * query,rasqal_rowsource * rowsource,raptor_sequence * exprs_seq,raptor_sequence * vars_seq)827 rasqal_new_aggregation_rowsource(rasqal_world *world, rasqal_query* query,
828 rasqal_rowsource* rowsource,
829 raptor_sequence* exprs_seq,
830 raptor_sequence* vars_seq)
831 {
832 rasqal_aggregation_rowsource_context* con = NULL;
833 int flags = 0;
834 int size;
835 int i;
836
837 if(!world || !query || !rowsource || !exprs_seq || !vars_seq)
838 goto fail;
839
840 exprs_seq = rasqal_expression_copy_expression_sequence(exprs_seq);
841 vars_seq = rasqal_variable_copy_variable_sequence(vars_seq);
842
843 size = raptor_sequence_size(exprs_seq);
844 if(size != raptor_sequence_size(vars_seq)) {
845 RASQAL_DEBUG3("expressions sequence size %d does not match vars sequence size %d\n", size, raptor_sequence_size(vars_seq));
846 goto fail;
847 }
848
849
850 con = RASQAL_CALLOC(rasqal_aggregation_rowsource_context*, 1, sizeof(*con));
851 if(!con)
852 goto fail;
853
854 con->rowsource = rowsource;
855
856 con->exprs_seq = exprs_seq;
857 con->vars_seq = vars_seq;
858
859 /* allocate per-expr data */
860 con->expr_count = size;
861 con->expr_data = RASQAL_CALLOC(rasqal_agg_expr_data*, RASQAL_GOOD_CAST(size_t, size),
862 sizeof(rasqal_agg_expr_data));
863 if(!con->expr_data)
864 goto fail;
865
866 /* Initialise per-expr data */
867 for(i = 0; i < size; i++) {
868 rasqal_expression* expr = (rasqal_expression *)raptor_sequence_get_at(exprs_seq, i);
869 rasqal_variable* variable = (rasqal_variable*)raptor_sequence_get_at(vars_seq, i);
870 rasqal_agg_expr_data* expr_data = &con->expr_data[i];
871
872 expr_data->expr = rasqal_new_expression_from_expression(expr);
873 expr_data->variable = variable;
874
875 /* Prepare expression arguments sequence in per-expr data */
876 if(expr->args) {
877 /* list of #rasqal_expression arguments already in expr
878 * #RASQAL_EXPR_FUNCTION and #RASQAL_EXPR_GROUP_CONCAT
879 */
880 expr_data->exprs_seq = rasqal_expression_copy_expression_sequence(expr->args);
881 } else {
882 /* single argument */
883
884 expr_data->exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
885 (raptor_data_print_handler)rasqal_expression_print);
886 raptor_sequence_push(expr_data->exprs_seq,
887 rasqal_new_expression_from_expression(expr->arg1));
888 }
889 }
890
891
892 return rasqal_new_rowsource_from_handler(world, query,
893 con,
894 &rasqal_aggregation_rowsource_handler,
895 query->vars_table,
896 flags);
897
898 fail:
899
900 if(rowsource)
901 rasqal_free_rowsource(rowsource);
902 if(exprs_seq)
903 raptor_free_sequence(exprs_seq);
904 if(vars_seq)
905 raptor_free_sequence(vars_seq);
906 if(con)
907 RASQAL_FREE(rasqal_aggregation_rowsource_context*, con);
908
909 return NULL;
910 }
911
912 #endif /* not STANDALONE */
913
914
915
916 #ifdef STANDALONE
917
918 /* one more prototype */
919 int main(int argc, char *argv[]);
920
921
922 #define AGGREGATION_TESTS_COUNT 6
923
924
925 #define MAX_TEST_VARS 3
926
927 /* Test 0 */
928 static const char* const data_xyz_3_rows[] =
929 {
930 /* 3 variable names and 3 rows */
931 "x", NULL, "y", NULL, "z", NULL,
932 /* row 1 data */
933 "1", NULL, "2", NULL, "3", NULL,
934 /* row 2 data */
935 "1", NULL, "3", NULL, "4", NULL,
936 /* row 3 data */
937 "2", NULL, "5", NULL, "6", NULL,
938 /* end of data */
939 NULL, NULL, NULL, NULL, NULL, NULL,
940 };
941
942 /* MAX(?y) GROUP BY ?x result */
943 static const int test0_output_rows[] =
944 { 3, 5, };
945 /* MIN(?x) GROUP BY ?x result */
946 static const int test1_output_rows[] =
947 { 1, 2, };
948 /* SUM(?z) GROUP BY ?x result */
949 static const int test2_output_rows[] =
950 { 7, 6, };
951 /* AVG(?x) GROUP BY ?x result */
952 static const double test3_output_rows[] =
953 { 1.0, 2.0, };
954 /* SAMPLE(?y) GROUP BY ?x result */
955 static const int test4_output_rows[] =
956 { 2, 5, };
957 /* GROUP_CONCAT(?z) GROUP BY ?x result */
958 static const char* const test5_output_rows[] =
959 { "3 4", "6", };
960
961
962 /* Input Group IDs expected */
963 /* Test 0 */
964 static const int test0_groupids[] = {
965 0, 0, 1
966 };
967
968 static const struct {
969 int input_vars;
970 int input_rows;
971 int input_ngroups;
972 int output_vars;
973 int output_rows;
974 const char* const *data;
975 const int *group_ids;
976 rasqal_literal_type result_type;
977 const int *result_int_data;
978 const double *result_double_data;
979 const char* const *result_string_data;
980 rasqal_op op;
981 const char* const expr_agg_vars[MAX_TEST_VARS];
982 } test_data[AGGREGATION_TESTS_COUNT] = {
983 /*
984 * Execute the aggregation part of SELECT (MAX(?y) AS ?fake) ... GROUP BY ?x
985 * Input 3 vars (x, y, z), 3 rows and 2 groups.
986 * Output is 1 var (fake), 2 rows (1 per input group)
987 * Expected result: [ ?fake => 3, ?fake => 5]
988 */
989 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
990 RASQAL_LITERAL_INTEGER, test0_output_rows, NULL,
991 NULL,
992 RASQAL_EXPR_MAX, { "y" } },
993
994 /*
995 * Execute the aggregation part of SELECT (MIN(?x) AS ?fake) ... GROUP BY ?x
996 * Expected result: [ ?fake => 1, ?fake => 2]
997 */
998 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
999 RASQAL_LITERAL_INTEGER, test1_output_rows, NULL,
1000 NULL,
1001 RASQAL_EXPR_MIN, { "x" } },
1002
1003 /*
1004 * Execute the aggregation part of SELECT (SUM(?z) AS ?fake) ... GROUP BY ?x
1005 * Expected result: [ ?fake => 7, ?fake => 6]
1006 */
1007 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1008 RASQAL_LITERAL_INTEGER, test2_output_rows, NULL,
1009 NULL,
1010 RASQAL_EXPR_SUM, { "z" } },
1011
1012 /*
1013 * Execute the aggregation part of SELECT (AVG(?x) AS ?fake) ... GROUP BY ?x
1014 * Expected result: [ ?fake => 1.0, ?fake => 2.0]
1015 */
1016 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1017 RASQAL_LITERAL_DECIMAL, NULL, test3_output_rows,
1018 NULL,
1019 RASQAL_EXPR_AVG, { "x" } },
1020
1021 /*
1022 * Execute the aggregation part of SELECT (SAMPLE(?y) AS ?fake) ... GROUP BY ?x
1023 * Expected result: [ ?fake => 2, ?fake => 5]
1024 */
1025 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1026 RASQAL_LITERAL_INTEGER, test4_output_rows, NULL,
1027 NULL,
1028 RASQAL_EXPR_SAMPLE, { "y" } },
1029
1030 /*
1031 * Execute the aggregation part of SELECT (GROUP_CONCAT(?z) AS ?fake) ... GROUP BY ?x
1032 * Expected result: [ ?fake => "3 4", ?fake => "6"]
1033 */
1034 {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1035 RASQAL_LITERAL_INTEGER, NULL, NULL,
1036 test5_output_rows,
1037 RASQAL_EXPR_GROUP_CONCAT, { "z" } }
1038 };
1039
1040
1041 static rasqal_expression*
make_test_expr(rasqal_world * world,raptor_sequence * expr_vars_seq,rasqal_op op)1042 make_test_expr(rasqal_world* world,
1043 raptor_sequence* expr_vars_seq,
1044 rasqal_op op)
1045 {
1046 if(op == RASQAL_EXPR_MAX ||
1047 op == RASQAL_EXPR_MIN ||
1048 op == RASQAL_EXPR_SUM ||
1049 op == RASQAL_EXPR_AVG ||
1050 op == RASQAL_EXPR_SAMPLE) {
1051 rasqal_expression* arg1;
1052
1053 arg1 = (rasqal_expression*)raptor_sequence_delete_at(expr_vars_seq, 0);
1054 raptor_free_sequence(expr_vars_seq);
1055
1056 return rasqal_new_aggregate_function_expression(world, op,
1057 arg1,
1058 /* params */ NULL,
1059 /* flags */ 0);
1060 }
1061
1062 if(op == RASQAL_EXPR_GROUP_CONCAT) {
1063 return rasqal_new_group_concat_expression(world,
1064 /* flags */ 0,
1065 expr_vars_seq,
1066 /* separator */ NULL);
1067 }
1068
1069 return NULL;
1070 }
1071
1072
1073 int
main(int argc,char * argv[])1074 main(int argc, char *argv[])
1075 {
1076 const char *program = rasqal_basename(argv[0]);
1077 rasqal_rowsource *rowsource = NULL;
1078 rasqal_world* world = NULL;
1079 rasqal_query* query = NULL;
1080 raptor_sequence* row_seq = NULL;
1081 raptor_sequence* expr_args_seq = NULL;
1082 int failures = 0;
1083 rasqal_variables_table* vt;
1084 rasqal_rowsource *input_rs = NULL;
1085 raptor_sequence* vars_seq = NULL;
1086 raptor_sequence* exprs_seq = NULL;
1087 int test_id;
1088
1089 world = rasqal_new_world();
1090 if(!world || rasqal_world_open(world)) {
1091 fprintf(stderr, "%s: rasqal_world init failed\n", program);
1092 return(1);
1093 }
1094
1095 query = rasqal_new_query(world, "sparql", NULL);
1096
1097 vt = query->vars_table;
1098
1099 for(test_id = 0; test_id < AGGREGATION_TESTS_COUNT; test_id++) {
1100 int input_vars_count = test_data[test_id].input_vars;
1101 int output_rows_count = test_data[test_id].output_rows;
1102 int output_vars_count = test_data[test_id].output_vars;
1103 const int* input_group_ids = test_data[test_id].group_ids;
1104 rasqal_literal_type expected_type = test_data[test_id].result_type;
1105 const int* result_int_data = test_data[test_id].result_int_data;
1106 const double* result_double_data = test_data[test_id].result_double_data;
1107 const char* const* result_string_data = test_data[test_id].result_string_data;
1108 rasqal_op op = test_data[test_id].op;
1109 raptor_sequence* seq = NULL;
1110 int count;
1111 int size;
1112 int i;
1113 #define OUT_VAR_NAME_LEN 4
1114 const char* output_var_name = "fake";
1115 rasqal_variable* output_var;
1116 rasqal_expression* expr;
1117 int output_row_size = (input_vars_count + output_vars_count);
1118
1119 if(output_vars_count != 1) {
1120 fprintf(stderr,
1121 "%s: test %d expects %d variables which is not supported. Test skipped\n",
1122 program, test_id, output_vars_count);
1123 failures++;
1124 goto tidy;
1125 }
1126
1127 row_seq = rasqal_new_row_sequence(world, vt, test_data[test_id].data,
1128 test_data[test_id].input_vars, &vars_seq);
1129 if(row_seq) {
1130 for(i = 0; i < test_data[test_id].input_rows; i++) {
1131 rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(row_seq, i);
1132 row->group_id = input_group_ids[i];
1133 }
1134
1135 input_rs = rasqal_new_rowsequence_rowsource(world, query, vt,
1136 row_seq, vars_seq);
1137 /* vars_seq and row_seq are now owned by input_rs */
1138 vars_seq = row_seq = NULL;
1139 }
1140 if(!input_rs) {
1141 fprintf(stderr, "%s: failed to create rowsequence rowsource\n", program);
1142 failures++;
1143 goto tidy;
1144 }
1145
1146 expr_args_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
1147 (raptor_data_print_handler)rasqal_expression_print);
1148
1149 if(test_data[test_id].expr_agg_vars[0] != NULL) {
1150 int vindex;
1151 const unsigned char* var_name;
1152
1153 for(vindex = 0;
1154 (var_name = RASQAL_GOOD_CAST(const unsigned char*, test_data[test_id].expr_agg_vars[vindex] ));
1155 vindex++) {
1156 rasqal_variable* v;
1157 rasqal_literal *l = NULL;
1158 rasqal_expression* e = NULL;
1159
1160 v = rasqal_variables_table_get_by_name(vt, RASQAL_VARIABLE_TYPE_NORMAL,
1161 var_name);
1162 /* returns SHARED pointer to variable */
1163 if(v) {
1164 v = rasqal_new_variable_from_variable(v);
1165 l = rasqal_new_variable_literal(world, v);
1166 }
1167
1168 if(l)
1169 e = rasqal_new_literal_expression(world, l);
1170
1171 if(e)
1172 raptor_sequence_push(expr_args_seq, e);
1173 else {
1174 fprintf(stderr, "%s: failed to create variable %s\n", program,
1175 RASQAL_GOOD_CAST(const char*, var_name));
1176 failures++;
1177 goto tidy;
1178 }
1179
1180 }
1181 } /* if vars */
1182
1183
1184 output_var = rasqal_variables_table_add2(vt, RASQAL_VARIABLE_TYPE_ANONYMOUS,
1185 RASQAL_GOOD_CAST(const unsigned char*, output_var_name),
1186 OUT_VAR_NAME_LEN, NULL);
1187 expr = make_test_expr(world, expr_args_seq, op);
1188 /* expr_args_seq is now owned by expr */
1189 expr_args_seq = NULL;
1190
1191 exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
1192 (raptor_data_print_handler)rasqal_expression_print);
1193 raptor_sequence_push(exprs_seq, expr);
1194 /* expr is now owned by exprs_seq */
1195 expr = NULL;
1196
1197 vars_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_variable,
1198 (raptor_data_print_handler)rasqal_variable_print);
1199 raptor_sequence_push(vars_seq, output_var);
1200 /* output_var is now owned by vars_seq */
1201 output_var = NULL;
1202
1203 rowsource = rasqal_new_aggregation_rowsource(world, query, input_rs,
1204 exprs_seq, vars_seq);
1205 /* input_rs is now owned by rowsource */
1206 input_rs = NULL;
1207 /* these are no longer needed; agg rowsource made copies */
1208 raptor_free_sequence(exprs_seq); exprs_seq = NULL;
1209 raptor_free_sequence(vars_seq); vars_seq = NULL;
1210
1211 if(!rowsource) {
1212 fprintf(stderr, "%s: failed to create aggregation rowsource\n", program);
1213 failures++;
1214 goto tidy;
1215 }
1216
1217
1218 /* Test the rowsource */
1219 seq = rasqal_rowsource_read_all_rows(rowsource);
1220 if(!seq) {
1221 fprintf(stderr,
1222 "%s: test %d rasqal_rowsource_read_all_rows() returned a NULL seq for a aggregation rowsource\n",
1223 program, test_id);
1224 failures++;
1225 goto tidy;
1226 }
1227 count = raptor_sequence_size(seq);
1228 if(count != output_rows_count) {
1229 fprintf(stderr,
1230 "%s: test %d rasqal_rowsource_read_all_rows() returned %d rows for a aggregation rowsource, expected %d\n",
1231 program, test_id, count, output_rows_count);
1232 failures++;
1233 goto tidy;
1234 }
1235
1236 size = rasqal_rowsource_get_size(rowsource);
1237 if(size != output_row_size) {
1238 fprintf(stderr,
1239 "%s: test %d rasqal_rowsource_get_size() returned %d columns (variables) for a aggregation rowsource, expected %d\n",
1240 program, test_id, size, output_row_size);
1241 failures++;
1242 goto tidy;
1243 }
1244
1245 if(result_int_data || result_double_data) {
1246 for(i = 0; i < output_rows_count; i++) {
1247 rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
1248 rasqal_literal* value;
1249 int vc;
1250
1251 if(row->size != output_row_size) {
1252 fprintf(stderr,
1253 "%s: test %d row #%d is size %d expected %d\n",
1254 program, test_id, i, row->size, output_row_size);
1255 failures++;
1256 goto tidy;
1257 }
1258
1259 /* Expected variable ordering in output row is:
1260 * {input vars} {output_vars}
1261 */
1262 for(vc = 0; vc < output_vars_count; vc++) {
1263 rasqal_variable* row_var;
1264 int offset = input_vars_count + vc;
1265
1266 row_var = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
1267 value = row->values[offset];
1268
1269 if(!value) {
1270 fprintf(stderr,
1271 "%s: test %d row #%d %s value #%d result is NULL\n",
1272 program, test_id, i, row_var->name, vc);
1273 failures++;
1274 goto tidy;
1275 }
1276
1277 if(value->type != expected_type) {
1278 fprintf(stderr,
1279 "%s: test %d row #%d %s value #%d result is type %s expected %s\n",
1280 program, test_id, i, row_var->name, vc,
1281 rasqal_literal_type_label(value->type),
1282 rasqal_literal_type_label(expected_type));
1283 failures++;
1284 goto tidy;
1285 }
1286
1287 if(expected_type == RASQAL_LITERAL_INTEGER) {
1288 int expected_integer = result_int_data[i];
1289 int integer;
1290
1291 integer = rasqal_literal_as_integer(value, NULL);
1292
1293 if(integer != expected_integer) {
1294 fprintf(stderr,
1295 "%s: test %d row #%d %s value #%d result is %d expected %d\n",
1296 program, test_id, i, row_var->name, vc,
1297 integer, expected_integer);
1298 failures++;
1299 goto tidy;
1300 }
1301 } else if(expected_type == RASQAL_LITERAL_DECIMAL) {
1302 double expected_double = result_double_data[i];
1303 double d;
1304
1305 d = rasqal_literal_as_double(value, NULL);
1306
1307 if(!rasqal_double_approximately_equal(d, expected_double)) {
1308 fprintf(stderr,
1309 "%s: test %d row #%d %s value #%d result is %f expected %f\n",
1310 program, test_id, i, row_var->name, vc,
1311 d, expected_double);
1312 failures++;
1313 goto tidy;
1314 }
1315 }
1316
1317 }
1318
1319 }
1320 }
1321
1322 if(result_string_data) {
1323 for(i = 0; i < output_rows_count; i++) {
1324 rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
1325 rasqal_literal* value;
1326 const unsigned char* str;
1327 const char* expected_string = result_string_data[i];
1328 int vc;
1329
1330 if(row->size != output_row_size) {
1331 fprintf(stderr,
1332 "%s: test %d row #%d is size %d expected %d\n",
1333 program, test_id, i, row->size, output_row_size);
1334 failures++;
1335 goto tidy;
1336 }
1337
1338 /* Expected variable ordering in output row is:
1339 * {input vars} {output_vars}
1340 */
1341 for(vc = 0; vc < output_vars_count; vc++) {
1342 rasqal_variable* row_var;
1343 int offset = input_vars_count + vc;
1344
1345 row_var = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
1346 value = row->values[offset];
1347
1348 if(!value) {
1349 fprintf(stderr,
1350 "%s: test %d row #%d %s value #%d result is NULL\n",
1351 program, test_id, i, row_var->name, vc);
1352 failures++;
1353 goto tidy;
1354 }
1355
1356 if(value->type != RASQAL_LITERAL_STRING) {
1357 fprintf(stderr,
1358 "%s: test %d row #%d %s value #%d is type %s expected integer\n",
1359 program, test_id, i, row_var->name, vc,
1360 rasqal_literal_type_label(value->type));
1361 failures++;
1362 goto tidy;
1363 }
1364
1365 str = rasqal_literal_as_string(value);
1366
1367 if(strcmp(RASQAL_GOOD_CAST(const char*, str), expected_string)) {
1368 fprintf(stderr,
1369 "%s: test %d row #%d %s value #%d is %s expected %s\n",
1370 program, test_id, i, row_var->name, vc,
1371 str, expected_string);
1372 failures++;
1373 goto tidy;
1374 }
1375 }
1376
1377 }
1378 }
1379
1380
1381 #ifdef RASQAL_DEBUG
1382 rasqal_rowsource_print_row_sequence(rowsource, seq, stderr);
1383 #endif
1384
1385 raptor_free_sequence(seq); seq = NULL;
1386
1387 rasqal_free_rowsource(rowsource); rowsource = NULL;
1388
1389 if(expr_args_seq)
1390 raptor_free_sequence(expr_args_seq);
1391 expr_args_seq = NULL;
1392 }
1393
1394
1395 tidy:
1396 if(exprs_seq)
1397 raptor_free_sequence(exprs_seq);
1398 if(vars_seq)
1399 raptor_free_sequence(vars_seq);
1400 if(expr_args_seq)
1401 raptor_free_sequence(expr_args_seq);
1402 if(rowsource)
1403 rasqal_free_rowsource(rowsource);
1404 if(input_rs)
1405 rasqal_free_rowsource(input_rs);
1406 if(query)
1407 rasqal_free_query(query);
1408 if(world)
1409 rasqal_free_world(world);
1410
1411 return failures;
1412 }
1413
1414 #endif /* STANDALONE */
1415