1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * rasqal_rowsource_aggregation.c - Rasqal aggregation rowsource class
4  *
5  * Handles SPARQL Aggregation() algebra including Distinct of
6  * expression arguments.
7  *
8  * Copyright (C) 2010, David Beckett http://www.dajobe.org/
9  *
10  * This package is Free Software and part of Redland http://librdf.org/
11  *
12  * It is licensed under the following three licenses as alternatives:
13  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
14  *   2. GNU General Public License (GPL) V2 or any newer version
15  *   3. Apache License, V2.0 or any newer version
16  *
17  * You may not use this file except in compliance with at least one of
18  * the above three licenses.
19  *
20  * See LICENSE.html or LICENSE.txt at the top of this package for the
21  * complete terms and further detail along with the license texts for
22  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
23  *
24  */
25 
26 
27 #ifdef HAVE_CONFIG_H
28 #include <rasqal_config.h>
29 #endif
30 
31 #ifdef WIN32
32 #include <win32_rasqal_config.h>
33 #endif
34 
35 #include <stdio.h>
36 #include <string.h>
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 
41 #include <raptor.h>
42 
43 #include "rasqal.h"
44 #include "rasqal_internal.h"
45 
46 
47 #define DEBUG_FH stderr
48 
49 
50 #ifndef STANDALONE
51 
52 
53 /*
54  * rasqal_agg_expr_data:
55  *
56  * INTERNAL - data for defining an agg expression input args / output var/values
57  *
58  * This is separate from #rasqal_builtin_agg_expression_execute which contains
59  * information only needed during execution.
60  */
61 typedef struct
62 {
63   /* agg expression */
64   rasqal_expression* expr;
65 
66   /* aggregation function execution user data as created by
67    * rasqal_builtin_agg_expression_execute_init() and destroyed by
68    * rasqal_builtin_agg_expression_execute_finish().
69    */
70   void* agg_user_data;
71 
72   /* (shared) output variable for this expression pointing into
73    * aggregation rowsource context vars_seq */
74   rasqal_variable* variable;
75 
76   /* sequence of aggregate function arguments */
77   raptor_sequence* exprs_seq;
78 
79   /* map for distincting literal values */
80   rasqal_map* map;
81 } rasqal_agg_expr_data;
82 
83 
84 /*
85  * rasqal_aggregation_rowsource_context:
86  *
87  * INTERNAL - Aggregration rowsource context
88  *
89  * Structure for handing aggregation over a grouped input rowsource
90  * created by rasqal_new_aggregation_rowsource().
91  *
92  */
93 typedef struct
94 {
95   /* inner (grouped) rowsource */
96   rasqal_rowsource *rowsource;
97 
98   /* aggregate expressions */
99   raptor_sequence* exprs_seq;
100 
101   /* output variables to bind (in order) */
102   raptor_sequence* vars_seq;
103 
104   /* pointer to array of data per aggregate expression */
105   rasqal_agg_expr_data* expr_data;
106 
107   /* number of agg expressions (size of exprs_seq, vars_seq, expr_data) */
108   int expr_count;
109 
110   /* non-0 when done */
111   int finished;
112 
113   /* last group ID seen */
114   int last_group_id;
115 
116   /* saved row between group boundaries */
117   rasqal_row* saved_row;
118 
119   /* output row offset */
120   int offset;
121 
122   /* sequence of values from input rowsource to copy/sample through */
123   raptor_sequence* input_values;
124 
125   /* number of variables/values on input rowsource to copy/sample through
126    * (size of @input_values) */
127   int input_values_count;
128 
129   /* step into current group */
130   int step_count;
131 } rasqal_aggregation_rowsource_context;
132 
133 
134 /*
135  * rasqal_builtin_agg_expression_execute:
136  *
137  * INTERNAL - state for built-in execution of certain aggregate expressions
138  *
139  * Executes AVG, COUNT, GROUP_CONCAT, MAX, MIN, SAMPLE
140  *
141  */
142 typedef struct
143 {
144   rasqal_world* world;
145 
146   /* expression being executed */
147   rasqal_expression* expr;
148 
149   /* literal for computation (e.g. current MAX, MIN seen) */
150   rasqal_literal* l;
151 
152   /* number of steps executed - used for AVG in calculating result */
153   int count;
154 
155   /* error happened */
156   int error;
157 
158   /* separator for GROUP_CONCAT */
159   unsigned char separator[2];
160 
161   /* string buffer for GROUP_CONCAT */
162   raptor_stringbuffer *sb;
163 } rasqal_builtin_agg_expression_execute;
164 
165 
166 static void rasqal_builtin_agg_expression_execute_finish(void* user_data);
167 
168 
169 static void*
rasqal_builtin_agg_expression_execute_init(rasqal_world * world,rasqal_expression * expr)170 rasqal_builtin_agg_expression_execute_init(rasqal_world *world,
171                                            rasqal_expression* expr)
172 {
173   rasqal_builtin_agg_expression_execute* b;
174 
175   b = RASQAL_CALLOC(rasqal_builtin_agg_expression_execute*, 1, sizeof(*b));
176   if(!b)
177     return NULL;
178 
179   b->expr = expr;
180   b->world = world;
181   b->l = NULL;
182   b->count = 0;
183   b->error = 0;
184 
185   if(expr->op == RASQAL_EXPR_GROUP_CONCAT) {
186     b->sb = raptor_new_stringbuffer();
187     if(!b->sb) {
188       rasqal_builtin_agg_expression_execute_finish(b);
189       return NULL;
190     }
191 
192     b->separator[0] = (unsigned char)' ';
193     b->separator[1] = (unsigned char)'\0';
194   }
195 
196   return b;
197 }
198 
199 
200 static void
rasqal_builtin_agg_expression_execute_finish(void * user_data)201 rasqal_builtin_agg_expression_execute_finish(void* user_data)
202 {
203   rasqal_builtin_agg_expression_execute* b;
204 
205   b = (rasqal_builtin_agg_expression_execute*)user_data;
206 
207   if(b->l)
208     rasqal_free_literal(b->l);
209 
210   if(b->sb)
211     raptor_free_stringbuffer(b->sb);
212 
213   RASQAL_FREE(rasqal_builtin_agg_expression_execute, b);
214 }
215 
216 
217 static int
rasqal_builtin_agg_expression_execute_reset(void * user_data)218 rasqal_builtin_agg_expression_execute_reset(void* user_data)
219 {
220   rasqal_builtin_agg_expression_execute* b;
221 
222   b = (rasqal_builtin_agg_expression_execute*)user_data;
223 
224   b->count = 0;
225   b->error = 0;
226 
227   if(b->l) {
228     rasqal_free_literal(b->l);
229     b->l = 0;
230   }
231 
232   if(b->sb) {
233     raptor_free_stringbuffer(b->sb);
234     b->sb = raptor_new_stringbuffer();
235     if(!b->sb)
236       return 1;
237   }
238 
239   return 0;
240 }
241 
242 
243 static int
rasqal_builtin_agg_expression_execute_step(void * user_data,raptor_sequence * literals)244 rasqal_builtin_agg_expression_execute_step(void* user_data,
245                                            raptor_sequence* literals)
246 {
247   rasqal_builtin_agg_expression_execute* b;
248   rasqal_literal* l;
249   int i;
250 
251   b = (rasqal_builtin_agg_expression_execute*)user_data;
252 
253   if(b->error)
254     return b->error;
255 
256   if(b->expr->op == RASQAL_EXPR_COUNT) {
257     /* COUNT(*) : counts every row (does not care about literals) */
258     if(b->expr->arg1->op == RASQAL_EXPR_VARSTAR)
259       b->count++;
260     /* COUNT(expr list) : counts rows with non-empty sequence of literals */
261     else if(raptor_sequence_size(literals) > 0)
262       b->count++;
263 
264     return 0;
265   }
266 
267 
268   /* Other aggregate functions count every row */
269   b->count++;
270 
271   for(i = 0; (l = (rasqal_literal*)raptor_sequence_get_at(literals, i)); i++) {
272     rasqal_literal* result = NULL;
273 
274     if(b->expr->op == RASQAL_EXPR_SAMPLE) {
275       /* Sample chooses the first literal it sees */
276       if(!b->l)
277         b->l = rasqal_new_literal_from_literal(l);
278 
279       break;
280     }
281 
282     if(b->expr->op == RASQAL_EXPR_GROUP_CONCAT) {
283       const unsigned char* str;
284       int error = 0;
285 
286       str = RASQAL_GOOD_CAST(const unsigned char*, rasqal_literal_as_string_flags(l, 0, &error));
287 
288       if(!error) {
289         if(raptor_stringbuffer_length(b->sb))
290           raptor_stringbuffer_append_counted_string(b->sb, b->separator, 1, 1);
291 
292         raptor_stringbuffer_append_string(b->sb, str, 1);
293       }
294       continue;
295     }
296 
297 
298     if(!b->l)
299       result = rasqal_new_literal_from_literal(l);
300     else {
301       if(b->expr->op == RASQAL_EXPR_SUM || b->expr->op == RASQAL_EXPR_AVG) {
302         result = rasqal_literal_add(b->l, l, &b->error);
303       } else if(b->expr->op == RASQAL_EXPR_MIN) {
304         int cmp = rasqal_literal_compare(b->l, l, 0, &b->error);
305         if(cmp <= 0)
306           result = rasqal_new_literal_from_literal(b->l);
307         else
308           result = rasqal_new_literal_from_literal(l);
309       } else if(b->expr->op == RASQAL_EXPR_MAX) {
310         int cmp = rasqal_literal_compare(b->l, l, 0, &b->error);
311         if(cmp >= 0)
312           result = rasqal_new_literal_from_literal(b->l);
313         else
314           result = rasqal_new_literal_from_literal(l);
315       } else {
316         RASQAL_FATAL2("Builtin aggregation operation %u is not implemented",
317                       b->expr->op);
318       }
319 
320       rasqal_free_literal(b->l);
321 
322       if(!result)
323         b->error = 1;
324     }
325 
326     b->l = result;
327 
328 #if defined(RASQAL_DEBUG) && RASQAL_DEBUG > 1
329     RASQAL_DEBUG3("Aggregation step result %s (error=%d)\n",
330                   (result ? RASQAL_GOOD_CAST(const char*, rasqal_literal_as_string(result)) : "(NULL)"),
331                   b->error);
332 #endif
333   }
334 
335   return b->error;
336 }
337 
338 
339 static rasqal_literal*
rasqal_builtin_agg_expression_execute_result(void * user_data)340 rasqal_builtin_agg_expression_execute_result(void* user_data)
341 {
342   rasqal_builtin_agg_expression_execute* b;
343 
344   b = (rasqal_builtin_agg_expression_execute*)user_data;
345 
346   if(b->error)
347     return NULL;
348 
349   if(b->expr->op == RASQAL_EXPR_COUNT) {
350     rasqal_literal* result;
351 
352     result = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
353                                         b->count);
354     return result;
355   }
356 
357   if(b->expr->op == RASQAL_EXPR_GROUP_CONCAT) {
358     size_t len;
359     unsigned char* str;
360     rasqal_literal* result;
361 
362     len = raptor_stringbuffer_length(b->sb);
363     str = RASQAL_MALLOC(unsigned char*, len + 1);
364     if(!str)
365       return NULL;
366 
367     if(raptor_stringbuffer_copy_to_string(b->sb, str, len)) {
368       RASQAL_FREE(char*, str);
369       return NULL;
370     }
371 
372     result = rasqal_new_string_literal(b->world, str, NULL, NULL, NULL);
373 
374     return result;
375   }
376 
377 
378   if(b->expr->op == RASQAL_EXPR_AVG) {
379     rasqal_literal* count_l = NULL;
380     rasqal_literal* result = NULL;
381 
382     if(b->count)
383       count_l = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
384                                            b->count);
385 
386     if(b->l && count_l)
387       result = rasqal_literal_divide(b->l, count_l, &b->error);
388     else
389       /* No total to divide */
390       b->error = 1;
391     if(count_l)
392       rasqal_free_literal(count_l);
393 
394     if(b->error) {
395       /* result will be NULL and error will be non-0 on division by 0
396        * in which case the result is literal(integer 0)
397        */
398       result = rasqal_new_integer_literal(b->world, RASQAL_LITERAL_INTEGER,
399                                           0);
400     }
401 
402     return result;
403   }
404 
405   return rasqal_new_literal_from_literal(b->l);
406 }
407 
408 
409 
410 static int
rasqal_aggregation_rowsource_init(rasqal_rowsource * rowsource,void * user_data)411 rasqal_aggregation_rowsource_init(rasqal_rowsource* rowsource, void *user_data)
412 {
413   rasqal_aggregation_rowsource_context* con;
414 
415   con = (rasqal_aggregation_rowsource_context*)user_data;
416 
417   con->input_values = raptor_new_sequence((raptor_data_free_handler)rasqal_free_literal,
418                                           (raptor_data_print_handler)rasqal_literal_print);
419 
420 
421   con->last_group_id = -1;
422   con->offset = 0;
423   con->step_count = 0;
424 
425   if(rasqal_rowsource_request_grouping(con->rowsource))
426     return 1;
427 
428   return 0;
429 }
430 
431 
432 static int
rasqal_aggregation_rowsource_finish(rasqal_rowsource * rowsource,void * user_data)433 rasqal_aggregation_rowsource_finish(rasqal_rowsource* rowsource,
434                                     void *user_data)
435 {
436   rasqal_aggregation_rowsource_context* con;
437 
438   con = (rasqal_aggregation_rowsource_context*)user_data;
439 
440   if(con->expr_data) {
441     int i;
442 
443     for(i = 0; i < con->expr_count; i++) {
444       rasqal_agg_expr_data* expr_data = &con->expr_data[i];
445 
446       if(expr_data->agg_user_data)
447         rasqal_builtin_agg_expression_execute_finish(expr_data->agg_user_data);
448 
449       if(expr_data->exprs_seq)
450         raptor_free_sequence(expr_data->exprs_seq);
451 
452       if(expr_data->expr)
453         rasqal_free_expression(expr_data->expr);
454 
455       if(expr_data->map)
456         rasqal_free_map(expr_data->map);
457     }
458 
459     RASQAL_FREE(rasqal_agg_expr_data, con->expr_data);
460   }
461 
462   if(con->exprs_seq)
463     raptor_free_sequence(con->exprs_seq);
464 
465   if(con->vars_seq)
466     raptor_free_sequence(con->vars_seq);
467 
468   if(con->rowsource)
469     rasqal_free_rowsource(con->rowsource);
470 
471   if(con->saved_row)
472     rasqal_free_row(con->saved_row);
473 
474   if(con->input_values)
475     raptor_free_sequence(con->input_values);
476 
477   RASQAL_FREE(rasqal_aggregation_rowsource_context, con);
478 
479   return 0;
480 }
481 
482 
483 static int
rasqal_aggregation_rowsource_ensure_variables(rasqal_rowsource * rowsource,void * user_data)484 rasqal_aggregation_rowsource_ensure_variables(rasqal_rowsource* rowsource,
485                                               void *user_data)
486 {
487   rasqal_aggregation_rowsource_context* con;
488   int offset;
489   int i;
490 
491   con = (rasqal_aggregation_rowsource_context*)user_data;
492 
493   if(rasqal_rowsource_ensure_variables(con->rowsource))
494     return 1;
495 
496   rowsource->size = 0;
497 
498   if(rasqal_rowsource_copy_variables(rowsource, con->rowsource))
499     return 1;
500 
501   con->input_values_count = rowsource->size;
502 
503   for(i = 0; i < con->expr_count; i++) {
504     rasqal_agg_expr_data* expr_data = &con->expr_data[i];
505 
506     offset = rasqal_rowsource_add_variable(rowsource, expr_data->variable);
507     if(offset < 0)
508       return 1;
509   }
510 
511   return 0;
512 }
513 
514 
515 static rasqal_row*
rasqal_aggregation_rowsource_read_row(rasqal_rowsource * rowsource,void * user_data)516 rasqal_aggregation_rowsource_read_row(rasqal_rowsource* rowsource,
517                                       void *user_data)
518 {
519   rasqal_aggregation_rowsource_context* con;
520   rasqal_row* row;
521   int error = 0;
522 
523   con = (rasqal_aggregation_rowsource_context*)user_data;
524 
525   if(con->finished)
526     return NULL;
527 
528 
529   /* Iterate over input rows until last row seen or group done */
530   while(1) {
531     error = 0;
532 
533     if(con->saved_row)
534       row = con->saved_row;
535     else
536       row = rasqal_rowsource_read_row(con->rowsource);
537 
538     if(!row) {
539       /* End of input - calculate last aggregation result */
540       con->finished = 1;
541       break;
542     }
543 
544 
545     if(con->last_group_id != row->group_id) {
546       int i;
547 
548       if(!con->saved_row && con->last_group_id >= 0) {
549         /* Existing aggregation is done - return result */
550 
551         /* save current row for next time this function is called */
552         con->saved_row = row;
553 
554         row = NULL;
555 #ifdef RASQAL_DEBUG
556         RASQAL_DEBUG2("Aggregation ending group %d", con->last_group_id);
557         fputc('\n', DEBUG_FH);
558 #endif
559 
560         /* Empty distinct maps */
561         for(i = 0; i < con->expr_count; i++) {
562           rasqal_agg_expr_data* expr_data = &con->expr_data[i];
563 
564           if(expr_data->map) {
565             rasqal_free_map(expr_data->map);
566             expr_data->map = NULL;
567           }
568         }
569 
570         break;
571       }
572 
573       /* reference is now in 'row' variable */
574       con->saved_row = NULL;
575 
576 #ifdef RASQAL_DEBUG
577       RASQAL_DEBUG2("Aggregation starting group %d", row->group_id);
578       fputc('\n', DEBUG_FH);
579 #endif
580 
581 
582       /* next time this function is called we continue here */
583 
584       for(i = 0; i < con->expr_count; i++) {
585         rasqal_agg_expr_data* expr_data = &con->expr_data[i];
586 
587         if(!expr_data->agg_user_data) {
588           /* init once */
589           expr_data->agg_user_data = rasqal_builtin_agg_expression_execute_init(rowsource->world,
590                                                                                 expr_data->expr);
591 
592           if(!expr_data->agg_user_data) {
593             error = 1;
594             break;
595           }
596         }
597 
598         /* Init map for each group */
599         if(expr_data->expr->flags & RASQAL_EXPR_FLAG_DISTINCT) {
600           expr_data->map = rasqal_new_literal_sequence_sort_map(1 /* is_distinct */,
601                                                                 0 /* compare_flags */);
602           if(!expr_data->map) {
603             error = 1;
604             break;
605           }
606         }
607       }
608 
609       if(error)
610         break;
611 
612       con->last_group_id = row->group_id;
613     } /* end if handling change of group ID */
614 
615 
616     /* Bind the values in the input row to the variables in the table */
617     rasqal_row_bind_variables(row, rowsource->query->vars_table);
618 
619     /* Evaluate the expressions giving a sequence of literals to
620      * run the aggregation step over.
621      */
622     if(1) {
623       int i;
624 
625       if(!con->step_count) {
626         /* copy first value row from input rowsource */
627         for(i = 0; i < con->input_values_count; i++) {
628           rasqal_literal* value;
629 
630           value = rasqal_new_literal_from_literal(row->values[i]);
631           raptor_sequence_set_at(con->input_values, i, value);
632         }
633       }
634 
635       con->step_count++;
636 
637       for(i = 0; i < con->expr_count; i++) {
638         rasqal_agg_expr_data* expr_data = &con->expr_data[i];
639         raptor_sequence* seq;
640 
641         /* SPARQL Aggregation uses ListEvalE() to evaluate - ignoring
642          * errors and filtering out expressions that fail
643          */
644         seq = rasqal_expression_sequence_evaluate(rowsource->query,
645                                                   expr_data->exprs_seq,
646                                                   /* ignore_errors */ 1,
647                                                   &error);
648         if(error)
649           continue;
650 
651         if(expr_data->map) {
652           if(rasqal_literal_sequence_sort_map_add_literal_sequence(expr_data->map,
653                                                                    seq)) {
654             /* duplicate found
655              *
656              * The above function just freed seq so no data is lost
657              */
658             continue;
659           }
660         }
661 
662 #ifdef RASQAL_DEBUG
663         RASQAL_DEBUG2("Aggregation expr %d step over literals: ", i);
664         raptor_sequence_print(seq, DEBUG_FH);
665         fputc('\n', DEBUG_FH);
666 #endif
667 
668         error = rasqal_builtin_agg_expression_execute_step(expr_data->agg_user_data,
669                                                            seq);
670         /* when DISTINCTing, seq remains owned by the map
671          * otherwise seq is local and must be freed
672          */
673         if(!expr_data->map)
674           raptor_free_sequence(seq);
675 
676         if(error) {
677           RASQAL_DEBUG2("Aggregation expr %d returned error\n", i);
678           error = 0;
679         }
680       }
681     }
682 
683     rasqal_free_row(row); row = NULL;
684 
685     if(error)
686       break;
687 
688   } /* end while reading rows */
689 
690 
691   if(error) {
692     /* Discard row on error */
693     if(row) {
694       rasqal_free_row(row);
695       row = NULL;
696     }
697   } else if (con->last_group_id >= 0) {
698     int offset = 0;
699     int i;
700 
701     /* Generate result row and reset for next group */
702     row = rasqal_new_row(rowsource);
703 
704     /* Copy scalar results through */
705     for(i = 0; i < con->input_values_count; i++) {
706       rasqal_literal* result;
707 
708       /* Reset: get and delete any stored input rowsource literal */
709       result = (rasqal_literal*)raptor_sequence_delete_at(con->input_values, i);
710 
711       rasqal_row_set_value_at(row, offset, result);
712       rasqal_free_literal(result);
713 
714       offset++;
715     }
716 
717 
718     /* Set aggregate results */
719     for(i = 0; i < con->expr_count; i++) {
720       rasqal_literal* result;
721       rasqal_agg_expr_data* expr_data = &con->expr_data[i];
722       rasqal_variable* v;
723 
724       /* Calculate the result because the input ended or a new group started */
725       result = rasqal_builtin_agg_expression_execute_result(expr_data->agg_user_data);
726 
727 #ifdef RASQAL_DEBUG
728       RASQAL_DEBUG2("Aggregation %d ending group with result: ", i);
729       rasqal_literal_print(result, DEBUG_FH);
730       fputc('\n', DEBUG_FH);
731 #endif
732 
733       v = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
734       result = rasqal_new_literal_from_literal(result);
735       /* it is OK to bind to NULL */
736       rasqal_variable_set_value(v, result);
737 
738       rasqal_row_set_value_at(row, offset, result);
739 
740       if(result)
741         rasqal_free_literal(result);
742 
743       offset++;
744 
745       if(rasqal_builtin_agg_expression_execute_reset(expr_data->agg_user_data)) {
746         rasqal_free_row(row);
747         row = NULL;
748         break;
749       }
750     }
751 
752     con->step_count = 0;
753 
754     if(row)
755       row->offset = con->offset++;
756   }
757 
758 
759   return row;
760 }
761 
762 
763 static rasqal_rowsource*
rasqal_aggregation_rowsource_get_inner_rowsource(rasqal_rowsource * rowsource,void * user_data,int offset)764 rasqal_aggregation_rowsource_get_inner_rowsource(rasqal_rowsource* rowsource,
765                                                  void *user_data, int offset)
766 {
767   rasqal_aggregation_rowsource_context *con;
768   con = (rasqal_aggregation_rowsource_context*)user_data;
769 
770   if(offset == 0)
771     return con->rowsource;
772 
773   return NULL;
774 }
775 
776 
777 static const rasqal_rowsource_handler rasqal_aggregation_rowsource_handler = {
778   /* .version = */ 1,
779   "aggregation",
780   /* .init = */ rasqal_aggregation_rowsource_init,
781   /* .finish = */ rasqal_aggregation_rowsource_finish,
782   /* .ensure_variables = */ rasqal_aggregation_rowsource_ensure_variables,
783   /* .read_row = */ rasqal_aggregation_rowsource_read_row,
784   /* .read_all_rows = */ NULL,
785   /* .reset = */ NULL,
786   /* .set_requirements = */ NULL,
787   /* .get_inner_rowsource = */ rasqal_aggregation_rowsource_get_inner_rowsource,
788   /* .set_origin = */ NULL,
789 };
790 
791 
792 /**
793  * rasqal_new_aggregation_rowsource:
794  * @world: world
795  * @query: query
796  * @rowsource: input (grouped) rowsource - typically constructed by rasqal_new_groupby_rowsource()
797  * @exprs_seq: sequence of #rasqal_expression
798  * @vars_seq: sequence of #rasqal_variable to bind in output rows
799  *
800  * INTERNAL - Create a new rowsource for a aggregration
801  *
802  * The @rowsource becomes owned by the new rowsource.  The @exprs_seq
803  * and @vars_seq are not.
804  *
805  * For example with the SPARQL 1.1 example queries
806  *
807  * SELECT (MAX(?y) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
808  * the aggregation part corresponds to
809  *   exprs_seq : [ expr MAX with sequence of expression args [?y] }
810  *   vars_seq  : [ {internal variable name} ]
811  *
812  * SELECT (ex:agg(?y, ?z) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
813  * the aggregation part corresponds to
814  *   exprs_seq : [ expr ex:agg with sequence of expression args [?y, ?z] ]
815  *   vars_seq  : [ {internal variable name} ]
816  *
817  * SELECT ?x, (MIN(?z) AS ?agg) WHERE { ?x ?y ?z } GROUP BY ?x
818  * the aggregation part corresponds to
819  *   exprs_seq : [ non-aggregate expression ?x,
820  *                 expr MIN with sequence of expression args [?z] ]
821  *   vars_seq  : [ ?x, {internal variable name} ]
822  *
823  * Return value: new rowsource or NULL on failure
824 */
825 
826 rasqal_rowsource*
rasqal_new_aggregation_rowsource(rasqal_world * world,rasqal_query * query,rasqal_rowsource * rowsource,raptor_sequence * exprs_seq,raptor_sequence * vars_seq)827 rasqal_new_aggregation_rowsource(rasqal_world *world, rasqal_query* query,
828                                  rasqal_rowsource* rowsource,
829                                  raptor_sequence* exprs_seq,
830                                  raptor_sequence* vars_seq)
831 {
832   rasqal_aggregation_rowsource_context* con = NULL;
833   int flags = 0;
834   int size;
835   int i;
836 
837   if(!world || !query || !rowsource || !exprs_seq || !vars_seq)
838     goto fail;
839 
840   exprs_seq = rasqal_expression_copy_expression_sequence(exprs_seq);
841   vars_seq = rasqal_variable_copy_variable_sequence(vars_seq);
842 
843   size = raptor_sequence_size(exprs_seq);
844   if(size != raptor_sequence_size(vars_seq)) {
845     RASQAL_DEBUG3("expressions sequence size %d does not match vars sequence size %d\n", size, raptor_sequence_size(vars_seq));
846     goto fail;
847   }
848 
849 
850   con = RASQAL_CALLOC(rasqal_aggregation_rowsource_context*, 1, sizeof(*con));
851   if(!con)
852     goto fail;
853 
854   con->rowsource = rowsource;
855 
856   con->exprs_seq = exprs_seq;
857   con->vars_seq = vars_seq;
858 
859   /* allocate per-expr data */
860   con->expr_count = size;
861   con->expr_data = RASQAL_CALLOC(rasqal_agg_expr_data*, RASQAL_GOOD_CAST(size_t, size),
862                                  sizeof(rasqal_agg_expr_data));
863   if(!con->expr_data)
864     goto fail;
865 
866   /* Initialise per-expr data */
867   for(i = 0; i < size; i++) {
868     rasqal_expression* expr = (rasqal_expression *)raptor_sequence_get_at(exprs_seq, i);
869     rasqal_variable* variable = (rasqal_variable*)raptor_sequence_get_at(vars_seq, i);
870     rasqal_agg_expr_data* expr_data = &con->expr_data[i];
871 
872     expr_data->expr = rasqal_new_expression_from_expression(expr);
873     expr_data->variable = variable;
874 
875     /* Prepare expression arguments sequence in per-expr data */
876     if(expr->args) {
877       /* list of #rasqal_expression arguments already in expr
878        * #RASQAL_EXPR_FUNCTION and #RASQAL_EXPR_GROUP_CONCAT
879        */
880       expr_data->exprs_seq = rasqal_expression_copy_expression_sequence(expr->args);
881     } else {
882       /* single argument */
883 
884       expr_data->exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
885                                                (raptor_data_print_handler)rasqal_expression_print);
886       raptor_sequence_push(expr_data->exprs_seq,
887                            rasqal_new_expression_from_expression(expr->arg1));
888     }
889   }
890 
891 
892   return rasqal_new_rowsource_from_handler(world, query,
893                                            con,
894                                            &rasqal_aggregation_rowsource_handler,
895                                            query->vars_table,
896                                            flags);
897 
898   fail:
899 
900   if(rowsource)
901     rasqal_free_rowsource(rowsource);
902   if(exprs_seq)
903     raptor_free_sequence(exprs_seq);
904   if(vars_seq)
905     raptor_free_sequence(vars_seq);
906   if(con)
907     RASQAL_FREE(rasqal_aggregation_rowsource_context*, con);
908 
909   return NULL;
910 }
911 
912 #endif /* not STANDALONE */
913 
914 
915 
916 #ifdef STANDALONE
917 
918 /* one more prototype */
919 int main(int argc, char *argv[]);
920 
921 
922 #define AGGREGATION_TESTS_COUNT 6
923 
924 
925 #define MAX_TEST_VARS 3
926 
927 /* Test 0 */
928 static const char* const data_xyz_3_rows[] =
929 {
930   /* 3 variable names and 3 rows */
931   "x",  NULL, "y",  NULL, "z",  NULL,
932   /* row 1 data */
933   "1",  NULL, "2",  NULL, "3",  NULL,
934   /* row 2 data */
935   "1",  NULL, "3",  NULL, "4",  NULL,
936   /* row 3 data */
937   "2",  NULL, "5",  NULL, "6",  NULL,
938   /* end of data */
939   NULL, NULL, NULL, NULL, NULL, NULL,
940 };
941 
942 /* MAX(?y) GROUP BY ?x result */
943 static const int test0_output_rows[] =
944 { 3, 5, };
945 /* MIN(?x) GROUP BY ?x result */
946 static const int test1_output_rows[] =
947 { 1, 2, };
948 /* SUM(?z) GROUP BY ?x result */
949 static const int test2_output_rows[] =
950 { 7, 6, };
951 /* AVG(?x) GROUP BY ?x result */
952 static const double test3_output_rows[] =
953 { 1.0, 2.0, };
954 /* SAMPLE(?y) GROUP BY ?x result */
955 static const int test4_output_rows[] =
956 { 2, 5, };
957 /* GROUP_CONCAT(?z) GROUP BY ?x result */
958 static const char* const test5_output_rows[] =
959 { "3 4", "6", };
960 
961 
962 /* Input Group IDs expected */
963 /* Test 0 */
964 static const int test0_groupids[] = {
965   0, 0, 1
966 };
967 
968 static const struct {
969   int input_vars;
970   int input_rows;
971   int input_ngroups;
972   int output_vars;
973   int output_rows;
974   const char* const *data;
975   const int *group_ids;
976   rasqal_literal_type result_type;
977   const int *result_int_data;
978   const double *result_double_data;
979   const char* const *result_string_data;
980   rasqal_op op;
981   const char* const expr_agg_vars[MAX_TEST_VARS];
982 } test_data[AGGREGATION_TESTS_COUNT] = {
983   /*
984    * Execute the aggregation part of SELECT (MAX(?y) AS ?fake) ... GROUP BY ?x
985    *   Input 3 vars (x, y, z), 3 rows and 2 groups.
986    *   Output is 1 var (fake), 2 rows (1 per input group)
987    * Expected result: [ ?fake => 3, ?fake => 5]
988    */
989   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
990    RASQAL_LITERAL_INTEGER, test0_output_rows, NULL,
991    NULL,
992    RASQAL_EXPR_MAX, { "y" } },
993 
994   /*
995    * Execute the aggregation part of SELECT (MIN(?x) AS ?fake) ... GROUP BY ?x
996    * Expected result: [ ?fake => 1, ?fake => 2]
997    */
998   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
999    RASQAL_LITERAL_INTEGER, test1_output_rows, NULL,
1000    NULL,
1001    RASQAL_EXPR_MIN, { "x" } },
1002 
1003   /*
1004    * Execute the aggregation part of SELECT (SUM(?z) AS ?fake) ... GROUP BY ?x
1005    * Expected result: [ ?fake => 7, ?fake => 6]
1006    */
1007   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1008    RASQAL_LITERAL_INTEGER, test2_output_rows, NULL,
1009    NULL,
1010    RASQAL_EXPR_SUM, { "z" } },
1011 
1012   /*
1013    * Execute the aggregation part of SELECT (AVG(?x) AS ?fake) ... GROUP BY ?x
1014    * Expected result: [ ?fake => 1.0, ?fake => 2.0]
1015    */
1016   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1017    RASQAL_LITERAL_DECIMAL, NULL, test3_output_rows,
1018    NULL,
1019    RASQAL_EXPR_AVG, { "x" } },
1020 
1021   /*
1022    * Execute the aggregation part of SELECT (SAMPLE(?y) AS ?fake) ... GROUP BY ?x
1023    * Expected result: [ ?fake => 2, ?fake => 5]
1024    */
1025   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1026    RASQAL_LITERAL_INTEGER, test4_output_rows, NULL,
1027    NULL,
1028    RASQAL_EXPR_SAMPLE, { "y" } },
1029 
1030   /*
1031    * Execute the aggregation part of SELECT (GROUP_CONCAT(?z) AS ?fake) ... GROUP BY ?x
1032    * Expected result: [ ?fake => "3 4", ?fake => "6"]
1033    */
1034   {3, 3, 2, 1, 2, data_xyz_3_rows, test0_groupids,
1035    RASQAL_LITERAL_INTEGER, NULL, NULL,
1036    test5_output_rows,
1037    RASQAL_EXPR_GROUP_CONCAT, { "z" } }
1038 };
1039 
1040 
1041 static rasqal_expression*
make_test_expr(rasqal_world * world,raptor_sequence * expr_vars_seq,rasqal_op op)1042 make_test_expr(rasqal_world* world,
1043                raptor_sequence* expr_vars_seq,
1044                rasqal_op op)
1045 {
1046   if(op == RASQAL_EXPR_MAX ||
1047      op == RASQAL_EXPR_MIN ||
1048      op == RASQAL_EXPR_SUM ||
1049      op == RASQAL_EXPR_AVG ||
1050      op == RASQAL_EXPR_SAMPLE) {
1051     rasqal_expression* arg1;
1052 
1053     arg1 = (rasqal_expression*)raptor_sequence_delete_at(expr_vars_seq, 0);
1054     raptor_free_sequence(expr_vars_seq);
1055 
1056     return rasqal_new_aggregate_function_expression(world, op,
1057                                                     arg1,
1058                                                     /* params */ NULL,
1059                                                     /* flags */ 0);
1060   }
1061 
1062   if(op == RASQAL_EXPR_GROUP_CONCAT) {
1063     return rasqal_new_group_concat_expression(world,
1064                                               /* flags */ 0,
1065                                               expr_vars_seq,
1066                                               /* separator */ NULL);
1067   }
1068 
1069   return NULL;
1070 }
1071 
1072 
1073 int
main(int argc,char * argv[])1074 main(int argc, char *argv[])
1075 {
1076   const char *program = rasqal_basename(argv[0]);
1077   rasqal_rowsource *rowsource = NULL;
1078   rasqal_world* world = NULL;
1079   rasqal_query* query = NULL;
1080   raptor_sequence* row_seq = NULL;
1081   raptor_sequence* expr_args_seq = NULL;
1082   int failures = 0;
1083   rasqal_variables_table* vt;
1084   rasqal_rowsource *input_rs = NULL;
1085   raptor_sequence* vars_seq = NULL;
1086   raptor_sequence* exprs_seq = NULL;
1087   int test_id;
1088 
1089   world = rasqal_new_world();
1090   if(!world || rasqal_world_open(world)) {
1091     fprintf(stderr, "%s: rasqal_world init failed\n", program);
1092     return(1);
1093   }
1094 
1095   query = rasqal_new_query(world, "sparql", NULL);
1096 
1097   vt = query->vars_table;
1098 
1099   for(test_id = 0; test_id < AGGREGATION_TESTS_COUNT; test_id++) {
1100     int input_vars_count = test_data[test_id].input_vars;
1101     int output_rows_count = test_data[test_id].output_rows;
1102     int output_vars_count = test_data[test_id].output_vars;
1103     const int* input_group_ids = test_data[test_id].group_ids;
1104     rasqal_literal_type expected_type = test_data[test_id].result_type;
1105     const int* result_int_data = test_data[test_id].result_int_data;
1106     const double* result_double_data = test_data[test_id].result_double_data;
1107     const char* const* result_string_data = test_data[test_id].result_string_data;
1108     rasqal_op op  = test_data[test_id].op;
1109     raptor_sequence* seq = NULL;
1110     int count;
1111     int size;
1112     int i;
1113     #define OUT_VAR_NAME_LEN 4
1114     const char* output_var_name = "fake";
1115     rasqal_variable* output_var;
1116     rasqal_expression* expr;
1117     int output_row_size = (input_vars_count + output_vars_count);
1118 
1119     if(output_vars_count != 1) {
1120       fprintf(stderr,
1121               "%s: test %d expects %d variables which is not supported. Test skipped\n",
1122               program, test_id, output_vars_count);
1123       failures++;
1124       goto tidy;
1125     }
1126 
1127     row_seq = rasqal_new_row_sequence(world, vt, test_data[test_id].data,
1128                                       test_data[test_id].input_vars, &vars_seq);
1129     if(row_seq) {
1130       for(i = 0; i < test_data[test_id].input_rows; i++) {
1131         rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(row_seq, i);
1132         row->group_id = input_group_ids[i];
1133       }
1134 
1135       input_rs = rasqal_new_rowsequence_rowsource(world, query, vt,
1136                                                   row_seq, vars_seq);
1137       /* vars_seq and row_seq are now owned by input_rs */
1138       vars_seq = row_seq = NULL;
1139     }
1140     if(!input_rs) {
1141       fprintf(stderr, "%s: failed to create rowsequence rowsource\n", program);
1142       failures++;
1143       goto tidy;
1144     }
1145 
1146     expr_args_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
1147                                         (raptor_data_print_handler)rasqal_expression_print);
1148 
1149     if(test_data[test_id].expr_agg_vars[0] != NULL) {
1150       int vindex;
1151       const unsigned char* var_name;
1152 
1153       for(vindex = 0;
1154           (var_name = RASQAL_GOOD_CAST(const unsigned char*, test_data[test_id].expr_agg_vars[vindex] ));
1155           vindex++) {
1156         rasqal_variable* v;
1157         rasqal_literal *l = NULL;
1158         rasqal_expression* e = NULL;
1159 
1160         v = rasqal_variables_table_get_by_name(vt, RASQAL_VARIABLE_TYPE_NORMAL,
1161                                                var_name);
1162         /* returns SHARED pointer to variable */
1163         if(v) {
1164           v = rasqal_new_variable_from_variable(v);
1165           l = rasqal_new_variable_literal(world, v);
1166         }
1167 
1168         if(l)
1169           e = rasqal_new_literal_expression(world, l);
1170 
1171         if(e)
1172           raptor_sequence_push(expr_args_seq, e);
1173         else {
1174           fprintf(stderr, "%s: failed to create variable %s\n", program,
1175                   RASQAL_GOOD_CAST(const char*, var_name));
1176           failures++;
1177           goto tidy;
1178         }
1179 
1180       }
1181     } /* if vars */
1182 
1183 
1184     output_var = rasqal_variables_table_add2(vt, RASQAL_VARIABLE_TYPE_ANONYMOUS,
1185                                              RASQAL_GOOD_CAST(const unsigned char*, output_var_name),
1186                                              OUT_VAR_NAME_LEN, NULL);
1187     expr = make_test_expr(world, expr_args_seq, op);
1188     /* expr_args_seq is now owned by expr */
1189     expr_args_seq = NULL;
1190 
1191     exprs_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_expression,
1192                                     (raptor_data_print_handler)rasqal_expression_print);
1193     raptor_sequence_push(exprs_seq, expr);
1194     /* expr is now owned by exprs_seq */
1195     expr = NULL;
1196 
1197     vars_seq = raptor_new_sequence((raptor_data_free_handler)rasqal_free_variable,
1198                                    (raptor_data_print_handler)rasqal_variable_print);
1199     raptor_sequence_push(vars_seq, output_var);
1200     /* output_var is now owned by vars_seq */
1201     output_var = NULL;
1202 
1203     rowsource = rasqal_new_aggregation_rowsource(world, query, input_rs,
1204                                                  exprs_seq, vars_seq);
1205     /* input_rs is now owned by rowsource */
1206     input_rs = NULL;
1207     /* these are no longer needed; agg rowsource made copies */
1208     raptor_free_sequence(exprs_seq); exprs_seq = NULL;
1209     raptor_free_sequence(vars_seq); vars_seq = NULL;
1210 
1211     if(!rowsource) {
1212       fprintf(stderr, "%s: failed to create aggregation rowsource\n", program);
1213       failures++;
1214       goto tidy;
1215     }
1216 
1217 
1218     /* Test the rowsource */
1219     seq = rasqal_rowsource_read_all_rows(rowsource);
1220     if(!seq) {
1221       fprintf(stderr,
1222               "%s: test %d rasqal_rowsource_read_all_rows() returned a NULL seq for a aggregation rowsource\n",
1223               program, test_id);
1224       failures++;
1225       goto tidy;
1226     }
1227     count = raptor_sequence_size(seq);
1228     if(count != output_rows_count) {
1229       fprintf(stderr,
1230               "%s: test %d rasqal_rowsource_read_all_rows() returned %d rows for a aggregation rowsource, expected %d\n",
1231               program, test_id, count, output_rows_count);
1232       failures++;
1233       goto tidy;
1234     }
1235 
1236     size = rasqal_rowsource_get_size(rowsource);
1237     if(size != output_row_size) {
1238       fprintf(stderr,
1239               "%s: test %d rasqal_rowsource_get_size() returned %d columns (variables) for a aggregation rowsource, expected %d\n",
1240               program, test_id, size, output_row_size);
1241       failures++;
1242       goto tidy;
1243     }
1244 
1245     if(result_int_data || result_double_data) {
1246       for(i = 0; i < output_rows_count; i++) {
1247         rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
1248         rasqal_literal* value;
1249         int vc;
1250 
1251         if(row->size != output_row_size) {
1252           fprintf(stderr,
1253                   "%s: test %d row #%d is size %d expected %d\n",
1254                   program, test_id, i, row->size, output_row_size);
1255           failures++;
1256           goto tidy;
1257         }
1258 
1259         /* Expected variable ordering in output row is:
1260          * {input vars} {output_vars}
1261          */
1262         for(vc = 0; vc < output_vars_count; vc++) {
1263           rasqal_variable* row_var;
1264           int offset = input_vars_count + vc;
1265 
1266           row_var = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
1267           value = row->values[offset];
1268 
1269           if(!value) {
1270             fprintf(stderr,
1271                     "%s: test %d row #%d %s value #%d result is NULL\n",
1272                     program, test_id, i, row_var->name, vc);
1273             failures++;
1274             goto tidy;
1275           }
1276 
1277           if(value->type != expected_type) {
1278             fprintf(stderr,
1279                     "%s: test %d row #%d %s value #%d result is type %s expected %s\n",
1280                     program, test_id, i, row_var->name, vc,
1281                     rasqal_literal_type_label(value->type),
1282                     rasqal_literal_type_label(expected_type));
1283             failures++;
1284             goto tidy;
1285           }
1286 
1287           if(expected_type == RASQAL_LITERAL_INTEGER) {
1288             int expected_integer = result_int_data[i];
1289             int integer;
1290 
1291             integer = rasqal_literal_as_integer(value, NULL);
1292 
1293             if(integer != expected_integer) {
1294               fprintf(stderr,
1295                     "%s: test %d row #%d %s value #%d result is %d expected %d\n",
1296                       program, test_id, i, row_var->name, vc,
1297                       integer, expected_integer);
1298               failures++;
1299               goto tidy;
1300             }
1301           } else if(expected_type == RASQAL_LITERAL_DECIMAL) {
1302             double expected_double = result_double_data[i];
1303             double d;
1304 
1305             d = rasqal_literal_as_double(value, NULL);
1306 
1307             if(!rasqal_double_approximately_equal(d, expected_double)) {
1308               fprintf(stderr,
1309                     "%s: test %d row #%d %s value #%d result is %f expected %f\n",
1310                       program, test_id, i, row_var->name, vc,
1311                       d, expected_double);
1312               failures++;
1313               goto tidy;
1314             }
1315           }
1316 
1317         }
1318 
1319       }
1320     }
1321 
1322     if(result_string_data) {
1323       for(i = 0; i < output_rows_count; i++) {
1324         rasqal_row* row = (rasqal_row*)raptor_sequence_get_at(seq, i);
1325         rasqal_literal* value;
1326         const unsigned char* str;
1327         const char* expected_string = result_string_data[i];
1328         int vc;
1329 
1330         if(row->size != output_row_size) {
1331           fprintf(stderr,
1332                   "%s: test %d row #%d is size %d expected %d\n",
1333                   program, test_id, i, row->size, output_row_size);
1334           failures++;
1335           goto tidy;
1336         }
1337 
1338         /* Expected variable ordering in output row is:
1339          * {input vars} {output_vars}
1340          */
1341         for(vc = 0; vc < output_vars_count; vc++) {
1342           rasqal_variable* row_var;
1343           int offset = input_vars_count + vc;
1344 
1345           row_var = rasqal_rowsource_get_variable_by_offset(rowsource, offset);
1346           value = row->values[offset];
1347 
1348           if(!value) {
1349             fprintf(stderr,
1350                     "%s: test %d row #%d %s value #%d result is NULL\n",
1351                     program, test_id, i, row_var->name, vc);
1352             failures++;
1353             goto tidy;
1354           }
1355 
1356           if(value->type != RASQAL_LITERAL_STRING) {
1357             fprintf(stderr,
1358                     "%s: test %d row #%d %s value #%d is type %s expected integer\n",
1359                     program, test_id, i, row_var->name, vc,
1360                     rasqal_literal_type_label(value->type));
1361             failures++;
1362             goto tidy;
1363           }
1364 
1365           str = rasqal_literal_as_string(value);
1366 
1367           if(strcmp(RASQAL_GOOD_CAST(const char*, str), expected_string)) {
1368             fprintf(stderr,
1369                     "%s: test %d row #%d %s value #%d is %s expected %s\n",
1370                     program, test_id, i, row_var->name, vc,
1371                     str, expected_string);
1372             failures++;
1373             goto tidy;
1374           }
1375         }
1376 
1377       }
1378     }
1379 
1380 
1381 #ifdef RASQAL_DEBUG
1382     rasqal_rowsource_print_row_sequence(rowsource, seq, stderr);
1383 #endif
1384 
1385     raptor_free_sequence(seq); seq = NULL;
1386 
1387     rasqal_free_rowsource(rowsource); rowsource = NULL;
1388 
1389     if(expr_args_seq)
1390       raptor_free_sequence(expr_args_seq);
1391     expr_args_seq = NULL;
1392   }
1393 
1394 
1395   tidy:
1396   if(exprs_seq)
1397     raptor_free_sequence(exprs_seq);
1398   if(vars_seq)
1399     raptor_free_sequence(vars_seq);
1400   if(expr_args_seq)
1401     raptor_free_sequence(expr_args_seq);
1402   if(rowsource)
1403     rasqal_free_rowsource(rowsource);
1404   if(input_rs)
1405     rasqal_free_rowsource(input_rs);
1406   if(query)
1407     rasqal_free_query(query);
1408   if(world)
1409     rasqal_free_world(world);
1410 
1411   return failures;
1412 }
1413 
1414 #endif /* STANDALONE */
1415