1 #include "dplyr.h"
2
3 namespace dplyr {
4
stop_summarise_unsupported_type(SEXP result)5 void stop_summarise_unsupported_type(SEXP result) {
6 DPLYR_ERROR_INIT(1);
7 DPLYR_ERROR_SET(0, "result", result);
8 DPLYR_ERROR_MESG_INIT(0);
9 DPLYR_ERROR_THROW("dplyr:::summarise_unsupported_type");
10 }
11
stop_summarise_mixed_null()12 void stop_summarise_mixed_null() {
13 DPLYR_ERROR_INIT(0);
14 DPLYR_ERROR_MESG_INIT(0);
15 DPLYR_ERROR_THROW("dplyr:::summarise_mixed_null");
16 }
17
stop_summarise_incompatible_size(int index_group,int index_expression,int expected_size,int size)18 void stop_summarise_incompatible_size(int index_group, int index_expression, int expected_size, int size) {
19 DPLYR_ERROR_INIT(4);
20 DPLYR_ERROR_SET(0, "group", Rf_ScalarInteger(index_group + 1));
21 DPLYR_ERROR_SET(1, "index", Rf_ScalarInteger(index_expression + 1));
22 DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(expected_size));
23 DPLYR_ERROR_SET(3, "size", Rf_ScalarInteger(size));
24 DPLYR_ERROR_MESG_INIT(0);
25 DPLYR_ERROR_THROW("dplyr:::summarise_incompatible_size");
26 }
27
28 }
29
30
dplyr_mask_eval_all_summarise(SEXP quo,SEXP env_private)31 SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private) {
32 DPLYR_MASK_INIT();
33
34 R_xlen_t n_null = 0;
35 SEXP chunks = PROTECT(Rf_allocVector(VECSXP, ngroups));
36 for (R_xlen_t i = 0; i < ngroups; i++) {
37 DPLYR_MASK_SET_GROUP(i);
38
39 SEXP result_i = PROTECT(DPLYR_MASK_EVAL(quo));
40 SET_VECTOR_ELT(chunks, i, result_i);
41
42 if (result_i == R_NilValue) {
43 n_null++;
44 } else if (!vctrs::vec_is_vector(result_i)) {
45 dplyr::stop_summarise_unsupported_type(result_i);
46 }
47
48 UNPROTECT(1);
49 }
50 DPLYR_MASK_FINALISE();
51 UNPROTECT(1);
52
53 if (n_null == ngroups) {
54 return R_NilValue;
55 } else if (n_null != 0) {
56 dplyr::stop_summarise_mixed_null();
57 }
58
59 return chunks;
60 }
61
is_useful_chunk(SEXP ptype)62 bool is_useful_chunk(SEXP ptype) {
63 return !Rf_inherits(ptype, "data.frame") || XLENGTH(ptype) > 0;
64 }
65
dplyr_summarise_recycle_chunks(SEXP chunks,SEXP rows,SEXP ptypes)66 SEXP dplyr_summarise_recycle_chunks(SEXP chunks, SEXP rows, SEXP ptypes) {
67 R_len_t n_chunks = LENGTH(chunks);
68 R_len_t n_groups = LENGTH(rows);
69
70 SEXP res = PROTECT(Rf_allocVector(VECSXP, 2));
71 Rf_namesgets(res, dplyr::vectors::names_summarise_recycle_chunks);
72 SET_VECTOR_ELT(res, 0, chunks);
73
74 SEXP useful = PROTECT(Rf_allocVector(LGLSXP, n_chunks));
75 int* p_useful = LOGICAL(useful);
76 int n_useful = 0;
77 const SEXP* p_ptypes = VECTOR_PTR_RO(ptypes);
78 for (R_len_t j = 0; j < n_chunks; j++) {
79 n_useful += p_useful[j] = is_useful_chunk(p_ptypes[j]);
80 }
81
82 // early exit if there are no useful chunks, this includes
83 // when there are no chunks at all
84 if (n_useful == 0) {
85 SET_VECTOR_ELT(res, 1, Rf_ScalarInteger(1));
86 UNPROTECT(2);
87 return res;
88 }
89
90 bool all_one = true;
91 int k = 1;
92 SEXP sizes = PROTECT(Rf_allocVector(INTSXP, n_groups));
93 int* p_sizes = INTEGER(sizes);
94 const SEXP* p_chunks = VECTOR_PTR_RO(chunks);
95 for (R_xlen_t i = 0; i < n_groups; i++, ++p_sizes) {
96 R_len_t n_i = 1;
97
98 R_len_t j = 0;
99 for (; j < n_chunks; j++) {
100 // skip useless chunks before looking for chunk size
101 for (; j < n_chunks && !p_useful[j]; j++);
102 if (j == n_chunks) break;
103
104 R_len_t n_i_j = vctrs::short_vec_size(VECTOR_ELT(p_chunks[j], i));
105
106 if (n_i != n_i_j) {
107 if (n_i == 1) {
108 n_i = n_i_j;
109 } else if (n_i_j != 1) {
110 dplyr::stop_summarise_incompatible_size(i, j, n_i, n_i_j);
111 }
112 }
113 }
114
115 k = k + n_i;
116 *p_sizes = n_i;
117 if (n_i != 1) {
118 all_one = false;
119 }
120 }
121
122 if (all_one) {
123 SET_VECTOR_ELT(res, 1, Rf_ScalarInteger(1));
124 } else {
125 // perform recycling
126 for (int j = 0; j < n_chunks; j++){
127 // skip useless chunks before recycling
128 for (; j < n_chunks && !p_useful[j]; j++);
129 if (j == n_chunks) break;
130
131 SEXP chunks_j = p_chunks[j];
132 int* p_sizes = INTEGER(sizes);
133 for (int i = 0; i < n_groups; i++, ++p_sizes) {
134 SET_VECTOR_ELT(chunks_j, i,
135 vctrs::short_vec_recycle(VECTOR_ELT(chunks_j, i), *p_sizes)
136 );
137 }
138 }
139 SET_VECTOR_ELT(res, 0, chunks);
140 SET_VECTOR_ELT(res, 1, sizes);
141 }
142
143 UNPROTECT(3);
144 return res;
145 }
146
dplyr_extract_chunks(SEXP df_list,SEXP df_ptype)147 SEXP dplyr_extract_chunks(SEXP df_list, SEXP df_ptype) {
148 R_xlen_t n_columns = XLENGTH(df_ptype);
149 R_xlen_t n_rows = XLENGTH(df_list);
150
151 const SEXP* p_df_list = VECTOR_PTR_RO(df_list);
152
153 SEXP out = PROTECT(Rf_allocVector(VECSXP, n_columns));
154 for (R_xlen_t i = 0; i < n_columns; i++) {
155 SEXP out_i = PROTECT(Rf_allocVector(VECSXP, n_rows));
156 for (R_xlen_t j = 0; j < n_rows; j++) {
157 SET_VECTOR_ELT(out_i, j, VECTOR_ELT(p_df_list[j], i));
158 }
159 SET_VECTOR_ELT(out, i, out_i);
160 UNPROTECT(1);
161 }
162 Rf_namesgets(out, Rf_getAttrib(df_ptype, R_NamesSymbol));
163 UNPROTECT(1);
164 return out;
165 }
166
167
168