1 #include "data.table.h"  // first (before Rdefines.h) for clang-13-omp, #5122
2 #include <Rdefines.h>
3 
coerceToRealListR(SEXP obj)4 SEXP coerceToRealListR(SEXP obj) {
5   // accept atomic/list of integer/logical/real returns list of real
6   int protecti = 0;
7   SEXP x = R_NilValue;
8   if (isVectorAtomic(obj)) {
9     x = PROTECT(allocVector(VECSXP, 1)); protecti++;
10     if (isReal(obj)) {
11       SET_VECTOR_ELT(x, 0, obj);
12     } else if (isInteger(obj) || isLogical(obj)) {
13       SET_VECTOR_ELT(x, 0, coerceVector(obj, REALSXP));
14     } else {
15       error(_("x must be of type numeric or logical"));
16     }
17   } else {
18     R_len_t nobj = length(obj);
19     x = PROTECT(allocVector(VECSXP, nobj)); protecti++;
20     for (R_len_t i=0; i<nobj; i++) {
21       if (isReal(VECTOR_ELT(obj, i))) {
22         SET_VECTOR_ELT(x, i, VECTOR_ELT(obj, i));
23       } else if (isInteger(VECTOR_ELT(obj, i)) || isLogical(VECTOR_ELT(obj, i))) {
24         SET_VECTOR_ELT(x, i, coerceVector(VECTOR_ELT(obj, i), REALSXP));
25       } else {
26         error(_("x must be list, data.frame or data.table of numeric or logical types"));
27       }
28     }
29   }
30   UNPROTECT(protecti);
31   return x;
32 }
33 
frollfunR(SEXP fun,SEXP obj,SEXP k,SEXP fill,SEXP algo,SEXP align,SEXP narm,SEXP hasna,SEXP adaptive)34 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasna, SEXP adaptive) {
35   int protecti = 0;
36   const bool verbose = GetVerbose();
37 
38   if (!xlength(obj))
39     return(obj);                                                // empty input: NULL, list()
40   double tic = 0;
41   if (verbose)
42     tic = omp_get_wtime();
43   SEXP x = PROTECT(coerceToRealListR(obj)); protecti++;
44   R_len_t nx=length(x);                                         // number of columns to roll on
45 
46   if (xlength(k) == 0)                                          // check that window is non zero length
47     error(_("n must be non 0 length"));
48 
49   if (!isLogical(adaptive) || length(adaptive) != 1 || LOGICAL(adaptive)[0] == NA_LOGICAL)
50     error(_("adaptive must be TRUE or FALSE"));
51   bool badaptive = LOGICAL(adaptive)[0];
52 
53   R_len_t nk = 0;                                               // number of rolling windows, for adaptive might be atomic to be wrapped into list, 0 for clang -Wall
54   SEXP ik = R_NilValue;                                         // holds integer window width, if doing non-adaptive roll fun
55   SEXP kl = R_NilValue;                                         // holds adaptive window width, if doing adaptive roll fun
56   if (!badaptive) {                                             // validating n input for adaptive=FALSE
57     if (isNewList(k))
58       error(_("n must be integer, list is accepted for adaptive TRUE"));
59 
60     if (isInteger(k)) {                                        // check that k is integer vector
61       ik = k;
62     } else if (isReal(k)) {                                    // if n is double then convert to integer
63       ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
64     } else {
65       error(_("n must be integer"));
66     }
67 
68     nk = length(k);
69     R_len_t i=0;                                                // check that all window values positive
70     while (i < nk && INTEGER(ik)[i] > 0) i++;
71     if (i != nk)
72       error(_("n must be positive integer values (> 0)"));
73   } else {                                                      // validating n input for adaptive=TRUE
74     if (isVectorAtomic(k)) {                                    // if not-list then wrap into list
75       kl = PROTECT(allocVector(VECSXP, 1)); protecti++;
76       if (isInteger(k)) {                                       // check that k is integer vector
77         SET_VECTOR_ELT(kl, 0, k);
78       } else if (isReal(k)) {                                   // if n is double then convert to integer
79         SET_VECTOR_ELT(kl, 0, coerceVector(k, INTSXP));
80       } else {
81         error(_("n must be integer vector or list of integer vectors"));
82       }
83       nk = 1;
84     } else {
85       nk = length(k);
86       kl = PROTECT(allocVector(VECSXP, nk)); protecti++;
87       for (R_len_t i=0; i<nk; i++) {
88         if (isInteger(VECTOR_ELT(k, i))) {
89           SET_VECTOR_ELT(kl, i, VECTOR_ELT(k, i));
90         } else if (isReal(VECTOR_ELT(k, i))) {                  // coerce double types to integer
91           SET_VECTOR_ELT(kl, i, coerceVector(VECTOR_ELT(k, i), INTSXP));
92         } else {
93           error(_("n must be integer vector or list of integer vectors"));
94         }
95       }
96     }
97   }
98   int **ikl = (int**)R_alloc(nk, sizeof(int*));                 // to not recalculate `length(x[[i]])` we store it in extra array
99   if (badaptive) {
100     for (int j=0; j<nk; j++) ikl[j] = INTEGER(VECTOR_ELT(kl, j));
101   }
102 
103   if (!IS_TRUE_OR_FALSE(narm))
104     error(_("na.rm must be TRUE or FALSE"));
105 
106   if (!isLogical(hasna) || length(hasna)!=1)
107     error(_("hasNA must be TRUE, FALSE or NA"));
108   if (LOGICAL(hasna)[0]==FALSE && LOGICAL(narm)[0])
109     error(_("using hasNA FALSE and na.rm TRUE does not make sense, if you know there are NA values use hasNA TRUE, otherwise leave it as default NA"));
110 
111   int ialign;                                                   // decode align to integer
112   if (!strcmp(CHAR(STRING_ELT(align, 0)), "right"))
113     ialign = 1;
114   else if (!strcmp(CHAR(STRING_ELT(align, 0)), "center"))
115     ialign = 0;
116   else if (!strcmp(CHAR(STRING_ELT(align, 0)), "left"))
117     ialign = -1;
118   else
119     error(_("Internal error: invalid align argument in rolling function, should have been caught before. please report to data.table issue tracker.")); // # nocov
120 
121   if (badaptive && ialign!=1)
122     error(_("using adaptive TRUE and align argument different than 'right' is not implemented"));
123 
124   SEXP ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; // allocate list to keep results
125   if (verbose)
126     Rprintf(_("%s: allocating memory for results %dx%d\n"), __func__, nx, nk);
127   ans_t *dans = (ans_t *)R_alloc(nx*nk, sizeof(ans_t));         // answer columns as array of ans_t struct
128   double** dx = (double**)R_alloc(nx, sizeof(double*));         // pointers to source columns
129   uint64_t* inx = (uint64_t*)R_alloc(nx, sizeof(uint64_t));     // to not recalculate `length(x[[i]])` we store it in extra array
130   for (R_len_t i=0; i<nx; i++) {
131     inx[i] = xlength(VECTOR_ELT(x, i));                         // for list input each vector can have different length
132     for (R_len_t j=0; j<nk; j++) {
133       if (badaptive) {                                          // extra input validation
134         if (i > 0 && (inx[i]!=inx[i-1]))                        // variable length list input not allowed for adaptive roll
135           error(_("adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame; If you want to call rolling function on list having variable length of elements call it for each field separately"));
136         if (xlength(VECTOR_ELT(kl, j))!=inx[0])                 // check that length of integer vectors in n list match to xrows[0] ([0] and not [i] because there is above check for equal xrows)
137           error(_("length of integer vector(s) provided as list to 'n' argument must be equal to number of observations provided in 'x'"));
138       }
139       SET_VECTOR_ELT(ans, i*nk+j, allocVector(REALSXP, inx[i]));// allocate answer vector for this column-window
140       dans[i*nk+j] = ((ans_t) { .dbl_v=REAL(VECTOR_ELT(ans, i*nk+j)), .status=0, .message={"\0","\0","\0","\0"} });
141     }
142     dx[i] = REAL(VECTOR_ELT(x, i));                             // assign source columns to C pointers
143   }
144 
145   enum {MEAN, SUM} sfun;
146   if (!strcmp(CHAR(STRING_ELT(fun, 0)), "mean")) {
147     sfun = MEAN;
148   } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "sum")) {
149     sfun = SUM;
150   } else {
151     error(_("Internal error: invalid fun argument in rolling function, should have been caught before. please report to data.table issue tracker.")); // # nocov
152   }
153 
154   if (length(fill) != 1)
155     error(_("fill must be a vector of length 1"));
156 
157   double dfill;
158   if (isInteger(fill)) {
159     if (INTEGER(fill)[0]==NA_LOGICAL) {
160       dfill = NA_REAL;
161     } else {
162       dfill = (double)INTEGER(fill)[0];
163     }
164   } else if (isReal(fill)) {
165     dfill = REAL(fill)[0];
166   } else if (isLogical(fill) && LOGICAL(fill)[0]==NA_LOGICAL){
167     dfill = NA_REAL;
168   } else {
169     error(_("fill must be numeric"));
170   }
171 
172   bool bnarm = LOGICAL(narm)[0];
173 
174   int ihasna =                                                  // plain C tri-state boolean as integer
175     LOGICAL(hasna)[0]==NA_LOGICAL ? 0 :                         // hasna NA, default, no info about NA
176     LOGICAL(hasna)[0]==TRUE ? 1 :                               // hasna TRUE, might be some NAs
177     -1;                                                         // hasna FALSE, there should be no NAs
178 
179   unsigned int ialgo;                                           // decode algo to integer
180   if (!strcmp(CHAR(STRING_ELT(algo, 0)), "fast"))
181     ialgo = 0;                                                  // fast = 0
182   else if (!strcmp(CHAR(STRING_ELT(algo, 0)), "exact"))
183     ialgo = 1;                                                  // exact = 1
184   else
185     error(_("Internal error: invalid algo argument in rolling function, should have been caught before. please report to data.table issue tracker.")); // # nocov
186 
187   int* iik = NULL;
188   if (!badaptive) {
189     if (!isInteger(ik))
190       error(_("Internal error: badaptive=%d but ik is not integer"), badaptive); // # nocov
191     iik = INTEGER(ik);                                          // pointer to non-adaptive window width, still can be vector when doing multiple windows
192   } else {
193     // ik is still R_NilValue from initialization. But that's ok as it's only needed below when !badaptive.
194   }
195 
196   if (verbose) {
197     if (ialgo==0)
198       Rprintf(_("%s: %d column(s) and %d window(s), if product > 1 then entering parallel execution\n"), __func__, nx, nk);
199     else if (ialgo==1)
200       Rprintf(_("%s: %d column(s) and %d window(s), not entering parallel execution here because algo='exact' will compute results in parallel\n"), __func__, nx, nk);
201   }
202   #pragma omp parallel for if (ialgo==0) schedule(dynamic) collapse(2) num_threads(getDTthreads(nx*nk, false))
203   for (R_len_t i=0; i<nx; i++) {                                // loop over multiple columns
204     for (R_len_t j=0; j<nk; j++) {                              // loop over multiple windows
205       switch (sfun) {
206       case MEAN :
207         if (!badaptive)
208           frollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
209         else
210           fadaptiverollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
211         break;
212       case SUM :
213         if (!badaptive)
214           frollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
215         else
216           fadaptiverollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
217         break;
218       default:
219         error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
220       }
221     }
222   }
223 
224   ansMsg(dans, nx*nk, verbose, __func__);                       // raise errors and warnings, as of now messages are not being produced
225 
226   if (verbose)
227     Rprintf(_("%s: processing of %d column(s) and %d window(s) took %.3fs\n"), __func__, nx, nk, omp_get_wtime()-tic);
228 
229   UNPROTECT(protecti);
230   return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
231 }
232 
frollapplyR(SEXP fun,SEXP obj,SEXP k,SEXP fill,SEXP align,SEXP rho)233 SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
234   int protecti = 0;
235   const bool verbose = GetVerbose();
236 
237   if (!isFunction(fun))
238     error(_("internal error: 'fun' must be a function")); // # nocov
239   if (!isEnvironment(rho))
240     error(_("internal error: 'rho' should be an environment")); // # nocov
241 
242   if (!xlength(obj))
243     return(obj);
244   double tic = 0;
245   if (verbose)
246     tic = omp_get_wtime();
247   SEXP x = PROTECT(coerceToRealListR(obj)); protecti++;
248   R_len_t nx = length(x);
249 
250   if (!isInteger(k)) {
251     if (isReal(k)) {
252       if (isRealReallyInt(k)) {
253         SEXP ik = PROTECT(coerceVector(k, INTSXP)); protecti++;
254         k = ik;
255       } else {
256         error(_("n must be integer"));
257       }
258     } else {
259       error(_("n must be integer"));
260     }
261   }
262   R_len_t nk = length(k);
263   if (nk == 0)
264     error(_("n must be non 0 length"));
265   int *ik = INTEGER(k);
266 
267   int ialign;
268   if (!strcmp(CHAR(STRING_ELT(align, 0)), "right")) {
269     ialign = 1;
270   } else if (!strcmp(CHAR(STRING_ELT(align, 0)), "center")) {
271     ialign = 0;
272   } else if (!strcmp(CHAR(STRING_ELT(align, 0)), "left")) {
273     ialign = -1;
274   } else {
275     error(_("Internal error: invalid align argument in rolling function, should have been caught before. please report to data.table issue tracker.")); // # nocov
276   }
277 
278   if (length(fill) != 1)
279     error(_("fill must be a vector of length 1"));
280   double dfill;
281   if (isInteger(fill)) {
282     if (INTEGER(fill)[0]==NA_LOGICAL) {
283       dfill = NA_REAL;
284     } else {
285       dfill = (double)INTEGER(fill)[0];
286     }
287   } else if (isReal(fill)) {
288     dfill = REAL(fill)[0];
289   } else if (isLogical(fill) && LOGICAL(fill)[0]==NA_LOGICAL){
290     dfill = NA_REAL;
291   } else {
292     error(_("fill must be numeric"));
293   }
294 
295   SEXP ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++;
296   if (verbose)
297     Rprintf(_("%s: allocating memory for results %dx%d\n"), __func__, nx, nk);
298   ans_t *dans = (ans_t *)R_alloc(nx*nk, sizeof(ans_t));
299   double** dx = (double**)R_alloc(nx, sizeof(double*));
300   uint64_t* inx = (uint64_t*)R_alloc(nx, sizeof(uint64_t));
301   for (R_len_t i=0; i<nx; i++) {
302     inx[i] = xlength(VECTOR_ELT(x, i));
303     for (R_len_t j=0; j<nk; j++) {
304       SET_VECTOR_ELT(ans, i*nk+j, allocVector(REALSXP, inx[i]));
305       dans[i*nk+j] = ((ans_t) { .dbl_v=REAL(VECTOR_ELT(ans, i*nk+j)), .status=0, .message={"\0","\0","\0","\0"} });
306     }
307     dx[i] = REAL(VECTOR_ELT(x, i));
308   }
309 
310   double* dw;
311   SEXP pw, pc;
312 
313   // in the outer loop we handle vectorized k argument
314   // for each k we need to allocate a width window object: pw
315   // we also need to construct distinct R call pointing to that window
316   for (R_len_t j=0; j<nk; j++) {
317     pw = PROTECT(allocVector(REALSXP, ik[j]));
318     dw = REAL(pw);
319     pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
320 
321     for (R_len_t i=0; i<nx; i++) {
322       frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
323     }
324 
325     UNPROTECT(2);
326   }
327 
328   if (verbose)
329     Rprintf(_("%s: processing of %d column(s) and %d window(s) took %.3fs\n"), __func__, nx, nk, omp_get_wtime()-tic);
330 
331   UNPROTECT(protecti);
332   return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
333 }
334