1 #include "data.table.h"
2 //#include <time.h>
3
4 static int ngrp = 0; // number of groups
5 static int *grpsize = NULL; // size of each group, used by gmean (and gmedian) not gsum
6 static int nrow = 0; // length of underlying x; same as length(ghigh) and length(glow)
7 static int *irows; // GForce support for subsets in 'i' (TODO: joins in 'i')
8 static int irowslen = -1; // -1 is for irows = NULL
9 static uint16_t *high=NULL, *low=NULL; // the group of each x item; a.k.a. which-group-am-I
10 static int *restrict grp; // TODO: eventually this can be made local for gforce as won't be needed globally when all functions here use gather
11 static size_t highSize;
12 static int shift, mask;
13 static char *gx=NULL;
14
15 static size_t nBatch, batchSize, lastBatchSize;
16 static int *counts, *tmpcounts;
17
18 // for gmedian
19 static int maxgrpn = 0;
20 static int *oo = NULL;
21 static int *ff = NULL;
22 static int isunsorted = 0;
23
24 // from R's src/cov.c (for variance / sd)
25 #ifdef HAVE_LONG_DOUBLE
26 # define SQRTL sqrtl
27 #else
28 # define SQRTL sqrt
29 #endif
30
nbit(int n)31 static int nbit(int n)
32 {
33 // returns position of biggest bit; i.e. floor(log2(n))+1 without using fpa
34 // not needed to be fast. Just a helper function.
35 int nb=0;
36 while (n) { nb++; n>>=1; }
37 return nb;
38 }
39
gforce(SEXP env,SEXP jsub,SEXP o,SEXP f,SEXP l,SEXP irowsArg)40 SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
41 double started = wallclock();
42 const bool verbose = GetVerbose();
43 if (TYPEOF(env) != ENVSXP) error(_("env is not an environment"));
44 // The type of jsub is pretty flexbile in R, so leave checking to eval() below.
45 if (!isInteger(o)) error(_("%s is not an integer vector"), "o");
46 if (!isInteger(f)) error(_("%s is not an integer vector"), "f");
47 if (!isInteger(l)) error(_("%s is not an integer vector"), "l");
48 if (isNull(irowsArg)) {
49 irows = NULL;
50 irowslen = -1;
51 }
52 else if (isInteger(irowsArg)) {
53 irows = INTEGER(irowsArg);
54 irowslen = LENGTH(irowsArg);
55 }
56 else error(_("irowsArg is neither an integer vector nor NULL")); // # nocov
57 ngrp = LENGTH(l);
58 if (LENGTH(f) != ngrp) error(_("length(f)=%d != length(l)=%d"), LENGTH(f), ngrp);
59 nrow=0;
60 grpsize = INTEGER(l);
61 maxgrpn = 0;
62 for (int i=0; i<ngrp; i++) {
63 nrow+=grpsize[i];
64 if (grpsize[i]>maxgrpn) maxgrpn = grpsize[i]; // old comment to be checked: 'needed for #2046 and #2111 when maxgrpn attribute is not attached to empty o'
65 }
66 if (LENGTH(o) && LENGTH(o)!=nrow) error(_("o has length %d but sum(l)=%d"), LENGTH(o), nrow);
67 {
68 SEXP tt = getAttrib(o, install("maxgrpn"));
69 if (length(tt)==1 && INTEGER(tt)[0]!=maxgrpn) error(_("Internal error: o's maxgrpn attribute mismatches recalculated maxgrpn")); // # nocov
70 }
71
72 int nb = nbit(ngrp-1);
73 shift = nb/2; // /2 so that high and low can be uint16_t, and no limit (even for nb=4) to stress-test.
74 // shift=MAX(nb-8,0); if (shift>16) shift=nb/2; // TODO: when we have stress-test off mode, do this
75 mask = (1<<shift)-1;
76 highSize = ((ngrp-1)>>shift) + 1;
77
78 grp = (int *)R_alloc(nrow, sizeof(int)); // TODO: use malloc and made this local as not needed globally when all functions here use gather
79 // maybe better to malloc to avoid R's heap. This grp isn't global, so it doesn't need to be R_alloc
80 const int *restrict fp = INTEGER(f);
81
82 nBatch = MIN((nrow+1)/2, getDTthreads(nrow, true)*2); // *2 to reduce last-thread-home. TODO: experiment. The higher this is though, the bigger is counts[]
83 batchSize = MAX(1, (nrow-1)/nBatch);
84 lastBatchSize = nrow - (nBatch-1)*batchSize;
85 // We deliberate use, for example, 40 batches of just 14 rows, to stress-test tests. This strategy proved to be a good one as #3204 immediately came to light.
86 // TODO: enable stress-test mode in tests only (#3205) which can be turned off by default in release to decrease overhead on small data
87 // if that is established to be biting (it may be fine).
88 if (nBatch<1 || batchSize<1 || lastBatchSize<1) {
89 error(_("Internal error: nrow=%d ngrp=%d nbit=%d shift=%d highSize=%d nBatch=%d batchSize=%d lastBatchSize=%d\n"), // # nocov
90 nrow, ngrp, nb, shift, highSize, nBatch, batchSize, lastBatchSize); // # nocov
91 }
92 // initial population of g:
93 #pragma omp parallel for num_threads(getDTthreads(ngrp, false))
94 for (int g=0; g<ngrp; g++) {
95 int *elem = grp + fp[g]-1;
96 for (int j=0; j<grpsize[g]; j++) elem[j] = g;
97 }
98 if (verbose) { Rprintf(_("gforce initial population of grp took %.3f\n"), wallclock()-started); started=wallclock(); }
99 isunsorted = 0;
100 if (LENGTH(o)) {
101 isunsorted = 1; // for gmedian
102
103 // What follows is more cache-efficient version of this scattered assign :
104 // for (int g=0; g<ngrp; g++) {
105 // const int *elem = op + fp[g]-1;
106 // for (int j=0; j<grpsize[g]; j++) grp[ elem[j]-1 ] = g;
107 //}
108
109 const int *restrict op = INTEGER(o); // o is a permutation of 1:nrow
110 int nb = nbit(nrow-1);
111 int shift = MAX(nb-8, 0); // TODO: experiment nb/2. Here it doesn't have to be /2 currently.
112 int highSize = ((nrow-1)>>shift) + 1;
113 //Rprintf(_("When assigning grp[o] = g, highSize=%d nb=%d shift=%d nBatch=%d\n"), highSize, nb, shift, nBatch);
114 int *counts = calloc(nBatch*highSize, sizeof(int)); // TODO: cache-line align and make highSize a multiple of 64
115 int *TMP = malloc(nrow*2l*sizeof(int)); // must multiple the long int otherwise overflow may happen, #4295
116 if (!counts || !TMP ) error(_("Internal error: Failed to allocate counts or TMP when assigning g in gforce"));
117 #pragma omp parallel for num_threads(getDTthreads(nBatch, false)) // schedule(dynamic,1)
118 for (int b=0; b<nBatch; b++) {
119 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
120 const int *my_o = op + b*batchSize;
121 int *restrict my_counts = counts + b*highSize;
122 for (int i=0; i<howMany; i++) {
123 const int w = (my_o[i]-1) >> shift;
124 my_counts[w]++;
125 }
126 for (int i=0, cum=0; i<highSize; i++) {
127 int tmp = my_counts[i];
128 my_counts[i] = cum;
129 cum += tmp;
130 }
131 const int *restrict my_g = grp + b*batchSize;
132 int *restrict my_tmp = TMP + b*2*batchSize;
133 for (int i=0; i<howMany; i++) {
134 const int w = (my_o[i]-1) >> shift; // could use my_high but may as well use my_pg since we need my_pg anyway for the lower bits next too
135 int *p = my_tmp + 2*my_counts[w]++;
136 *p++ = my_o[i]-1;
137 *p = my_g[i];
138 }
139 }
140 //Rprintf(_("gforce assign TMP (o,g) pairs took %.3f\n"), wallclock()-started); started=wallclock();
141 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
142 for (int h=0; h<highSize; h++) { // very important that high is first loop here
143 for (int b=0; b<nBatch; b++) {
144 const int start = h==0 ? 0 : counts[ b*highSize + h - 1 ];
145 const int end = counts[ b*highSize + h ];
146 const int *restrict p = TMP + b*2*batchSize + start*2;
147 for (int k=start; k<end; k++, p+=2) {
148 grp[p[0]] = p[1]; // TODO: could write high here, and initial low. ** If so, same in initial population when o is missing **
149 }
150 }
151 }
152 free(counts);
153 free(TMP);
154 //Rprintf(_("gforce assign TMP [ (o,g) pairs ] back to grp took %.3f\n"), wallclock()-started); started=wallclock();
155 }
156
157 high = (uint16_t *)R_alloc(nrow, sizeof(uint16_t)); // maybe better to malloc to avoid R's heap, but safer to R_alloc since it's done via eval()
158 low = (uint16_t *)R_alloc(nrow, sizeof(uint16_t));
159 // global ghigh and glow because the g* functions (inside jsub) share this common memory
160
161 gx = (char *)R_alloc(nrow, sizeof(Rcomplex)); // enough for a copy of one column (or length(irows) if supplied)
162 // TODO: reduce to the largest type present; won't be faster (untouched RAM won't be fetched) but it will increase the largest size that works.
163
164 counts = (int *)S_alloc(nBatch*highSize, sizeof(int)); // (S_ zeros) TODO: cache-line align and make highSize a multiple of 64
165 tmpcounts = (int *)R_alloc(getDTthreads(nBatch, false)*highSize, sizeof(int));
166
167 const int *restrict gp = grp;
168 #pragma omp parallel for num_threads(getDTthreads(nBatch, false)) // schedule(dynamic,1)
169 for (int b=0; b<nBatch; b++) {
170 int *restrict my_counts = counts + b*highSize;
171 uint16_t *restrict my_high = high + b*batchSize;
172 const int *my_pg = gp + b*batchSize;
173 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
174 for (int i=0; i<howMany; i++) {
175 const int w = my_pg[i] >> shift;
176 my_counts[w]++;
177 my_high[i] = (uint16_t)w; // reduce 4 bytes to 2
178 }
179 for (int i=0, cum=0; i<highSize; i++) {
180 int tmp = my_counts[i];
181 my_counts[i] = cum;
182 cum += tmp;
183 }
184 uint16_t *restrict my_low = low + b*batchSize;
185 int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
186 memcpy(my_tmpcounts, my_counts, highSize*sizeof(int));
187 for (int i=0; i<howMany; i++) {
188 const int w = my_pg[i] >> shift; // could use my_high but may as well use my_pg since we need my_pg anyway for the lower bits next too
189 my_low[my_tmpcounts[w]++] = (uint16_t)(my_pg[i] & mask);
190 }
191 // counts is now cumulated within batch (with ending values) and we leave it that way
192 // memcpy(counts + b*256, myCounts, 256*sizeof(int)); // save cumulate for later, first bucket contains position of next. For ease later in the very last batch.
193 }
194 if (verbose) { Rprintf(_("gforce assign high and low took %.3f\n"), wallclock()-started); started=wallclock(); }
195
196 oo = INTEGER(o);
197 ff = INTEGER(f);
198
199 SEXP ans = PROTECT( eval(jsub, env) );
200 if (verbose) { Rprintf(_("gforce eval took %.3f\n"), wallclock()-started); started=wallclock(); }
201 // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above.
202 if (isVectorAtomic(ans)) {
203 SEXP tt = PROTECT(allocVector(VECSXP, 1));
204 SET_VECTOR_ELT(tt, 0, ans);
205 UNPROTECT(2);
206 return tt;
207 }
208 UNPROTECT(1);
209 return ans;
210 }
211
gather(SEXP x,bool * anyNA)212 void *gather(SEXP x, bool *anyNA)
213 {
214 double started=wallclock();
215 const bool verbose = GetVerbose();
216 if (verbose) Rprintf(_("gather took ... "));
217 switch (TYPEOF(x)) {
218 case LGLSXP: case INTSXP: {
219 const int *restrict thisx = INTEGER(x);
220 #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
221 for (int b=0; b<nBatch; b++) {
222 int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
223 memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int)); // original cumulated // already cumulated for this batch
224 int *restrict my_gx = (int *)gx + b*batchSize;
225 const uint16_t *my_high = high + b*batchSize;
226 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
227 bool my_anyNA = false;
228 if (irowslen==-1) {
229 const int *my_x = thisx + b*batchSize;
230 for (int i=0; i<howMany; i++) {
231 const int elem = my_x[i];
232 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
233 if (elem==NA_INTEGER) my_anyNA = true;
234 }
235 } else {
236 const int *my_x = irows + b*batchSize;
237 for (int i=0; i<howMany; i++) {
238 int elem = thisx[ my_x[i]-1 ];
239 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
240 if (elem==NA_INTEGER) my_anyNA = true;
241 }
242 }
243 if (my_anyNA) *anyNA = true; // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
244 }
245 } break;
246 case REALSXP: {
247 if (!INHERITS(x, char_integer64)) {
248 const double *restrict thisx = REAL(x);
249 #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
250 for (int b=0; b<nBatch; b++) {
251 int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
252 memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
253 double *restrict my_gx = (double *)gx + b*batchSize;
254 const uint16_t *my_high = high + b*batchSize;
255 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
256 bool my_anyNA = false;
257 if (irowslen==-1) {
258 const double *my_x = thisx + b*batchSize;
259 for (int i=0; i<howMany; i++) {
260 const double elem = my_x[i];
261 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
262 if (ISNAN(elem)) my_anyNA = true; // R's ISNAN includes NA; i.e. defined as C isnan with some platform specific differences (perhaps historic)
263 }
264 } else {
265 const int *my_x = irows + b*batchSize;
266 for (int i=0; i<howMany; i++) {
267 double elem = thisx[ my_x[i]-1 ];
268 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
269 if (ISNAN(elem)) my_anyNA = true;
270 }
271 }
272 if (my_anyNA) *anyNA = true;
273 }
274 } else {
275 const int64_t *restrict thisx = (int64_t *)REAL(x);
276 #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
277 for (int b=0; b<nBatch; b++) {
278 int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
279 memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
280 int64_t *restrict my_gx = (int64_t *)gx + b*batchSize;
281 const uint16_t *my_high = high + b*batchSize;
282 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
283 bool my_anyNA = false;
284 if (irowslen==-1) {
285 const int64_t *my_x = thisx + b*batchSize;
286 for (int i=0; i<howMany; i++) {
287 const int64_t elem = my_x[i];
288 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
289 if (elem==INT64_MIN) my_anyNA = true;
290 }
291 } else {
292 const int *my_x = irows + b*batchSize;
293 for (int i=0; i<howMany; i++) {
294 int64_t elem = thisx[ my_x[i]-1 ];
295 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
296 if (elem==INT64_MIN) my_anyNA = true;
297 }
298 }
299 if (my_anyNA) *anyNA = true;
300 }
301 }
302 } break;
303 case CPLXSXP: {
304 const Rcomplex *restrict thisx = COMPLEX(x);
305 #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
306 for (int b=0; b<nBatch; b++) {
307 int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
308 memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
309 Rcomplex *restrict my_gx = (Rcomplex *)gx + b*batchSize;
310 const uint16_t *my_high = high + b*batchSize;
311 const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
312 bool my_anyNA = false;
313 if (irowslen==-1) {
314 const Rcomplex *my_x = thisx + b*batchSize;
315 for (int i=0; i<howMany; i++) {
316 const Rcomplex elem = my_x[i];
317 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
318 // typically just checking one component would be enough,
319 // but ?complex suggests there may be some edge cases; better to be safe
320 if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
321 }
322 } else {
323 const int *my_x = irows + b*batchSize;
324 for (int i=0; i<howMany; i++) {
325 Rcomplex elem = thisx[ my_x[i]-1 ];
326 my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
327 if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
328 }
329 }
330 if (my_anyNA) *anyNA = true; // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
331 }
332 } break;
333 default :
334 error(_("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'"), type2char(TYPEOF(x))); // # nocov
335 }
336 if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
337 return gx;
338 }
339
gsum(SEXP x,SEXP narmArg,SEXP warnOverflowArg)340 SEXP gsum(SEXP x, SEXP narmArg, SEXP warnOverflowArg)
341 {
342 if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
343 const bool narm = LOGICAL(narmArg)[0];
344 const bool warnOverflow = LOGICAL(warnOverflowArg)[0];
345 if (inherits(x, "factor")) error(_("sum is not meaningful for factors."));
346 const int n = (irowslen == -1) ? length(x) : irowslen;
347 double started = wallclock();
348 const bool verbose=GetVerbose();
349 if (verbose) Rprintf(_("This gsum took (narm=%s) ... "), narm?"TRUE":"FALSE");
350 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum");
351 bool anyNA=false;
352 SEXP ans;
353 switch(TYPEOF(x)) {
354 case LGLSXP: case INTSXP: {
355 const int *restrict gx = gather(x, &anyNA);
356 ans = PROTECT(allocVector(INTSXP, ngrp));
357 int *restrict ansp = INTEGER(ans);
358 memset(ansp, 0, ngrp*sizeof(int));
359 bool overflow=false;
360 //double started = wallclock();
361 if (!anyNA) {
362 #pragma omp parallel for num_threads(getDTthreads(highSize, false)) //schedule(dynamic,1)
363 for (int h=0; h<highSize; h++) { // very important that high is first loop here
364 int *restrict _ans = ansp + (h<<shift);
365 for (int b=0; b<nBatch; b++) {
366 const int pos = counts[ b*highSize + h ];
367 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
368 const int *my_gx = gx + b*batchSize + pos;
369 const uint16_t *my_low = low + b*batchSize + pos;
370 for (int i=0; i<howMany; i++) {
371 const int a = _ans[my_low[i]];
372 const int b = my_gx[i];
373 if ((a>0 && b>INT_MAX-a) || (a<0 && b<NA_INTEGER+1-a)) overflow=true;
374 else _ans[my_low[i]] += b; // naked by design; each thread does all of each h for all batches
375 }
376 }
377 }
378 } else {
379 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
380 for (int h=0; h<highSize; h++) {
381 int *restrict _ans = ansp + (h<<shift);
382 for (int b=0; b<nBatch; b++) {
383 const int pos = counts[ b*highSize + h ];
384 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
385 const int *my_gx = gx + b*batchSize + pos;
386 const uint16_t *my_low = low + b*batchSize + pos;
387 for (int i=0; i<howMany; i++) {
388 const int a = _ans[my_low[i]];
389 if (a==NA_INTEGER) continue;
390 const int b = my_gx[i];
391 if (b==NA_INTEGER) {
392 if (!narm) _ans[my_low[i]]=NA_INTEGER;
393 continue;
394 }
395 if ((a>0 && b>INT_MAX-a) || (a<0 && b<NA_INTEGER+1-a)) overflow=true;
396 else _ans[my_low[i]] += b;
397 }
398 }
399 }
400 }
401 //Rprintf(_("gsum int took %.3f\n"), wallclock()-started);
402 if (overflow) {
403 UNPROTECT(1); // discard the result with overflow
404 if (warnOverflow) warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience."));
405 ans = PROTECT(allocVector(REALSXP, ngrp));
406 double *restrict ansp = REAL(ans);
407 memset(ansp, 0, ngrp*sizeof(double));
408 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
409 for (int h=0; h<highSize; h++) {
410 double *restrict _ans = ansp + (h<<shift);
411 for (int b=0; b<nBatch; b++) {
412 const int pos = counts[ b*highSize + h ];
413 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
414 const int *my_gx = gx + b*batchSize + pos;
415 const uint16_t *my_low = low + b*batchSize + pos;
416 // rare and slower so no need to switch on anyNA
417 for (int i=0; i<howMany; i++) {
418 const int elem = my_gx[i];
419 if (elem==NA_INTEGER) {
420 if (!narm) _ans[my_low[i]]=NA_REAL;
421 continue;
422 }
423 _ans[my_low[i]] += elem; // let NA_REAL propagate
424 }
425 }
426 }
427 }
428 } break;
429 case REALSXP: {
430 if (!INHERITS(x, char_integer64)) {
431 const double *restrict gx = gather(x, &anyNA);
432 ans = PROTECT(allocVector(REALSXP, ngrp));
433 double *restrict ansp = REAL(ans);
434 memset(ansp, 0, ngrp*sizeof(double));
435 if (!narm || !anyNA) {
436 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
437 for (int h=0; h<highSize; h++) {
438 double *restrict _ans = ansp + (h<<shift);
439 for (int b=0; b<nBatch; b++) {
440 const int pos = counts[ b*highSize + h ];
441 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
442 const double *my_gx = gx + b*batchSize + pos;
443 const uint16_t *my_low = low + b*batchSize + pos;
444 for (int i=0; i<howMany; i++) {
445 _ans[my_low[i]] += my_gx[i]; // let NA propagate when !narm
446 }
447 }
448 }
449 } else {
450 // narm==true and anyNA==true
451 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
452 for (int h=0; h<highSize; h++) {
453 double *restrict _ans = ansp + (h<<shift);
454 for (int b=0; b<nBatch; b++) {
455 const int pos = counts[ b*highSize + h ];
456 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
457 const double *my_gx = gx + b*batchSize + pos;
458 const uint16_t *my_low = low + b*batchSize + pos;
459 for (int i=0; i<howMany; i++) {
460 const double elem = my_gx[i];
461 if (!ISNAN(elem)) _ans[my_low[i]] += elem;
462 }
463 }
464 }
465 }
466 } else { // int64
467 const int64_t *restrict gx = gather(x, &anyNA);
468 ans = PROTECT(allocVector(REALSXP, ngrp));
469 int64_t *restrict ansp = (int64_t *)REAL(ans);
470 memset(ansp, 0, ngrp*sizeof(int64_t));
471 if (!anyNA) {
472 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
473 for (int h=0; h<highSize; h++) {
474 int64_t *restrict _ans = ansp + (h<<shift);
475 for (int b=0; b<nBatch; b++) {
476 const int pos = counts[ b*highSize + h ];
477 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
478 const int64_t *my_gx = gx + b*batchSize + pos;
479 const uint16_t *my_low = low + b*batchSize + pos;
480 for (int i=0; i<howMany; i++) {
481 _ans[my_low[i]] += my_gx[i]; // does not propagate INT64 for !narm
482 }
483 }
484 }
485 } else { // narm==true/false and anyNA==true
486 if (!narm) {
487 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
488 for (int h=0; h<highSize; h++) {
489 int64_t *restrict _ans = ansp + (h<<shift);
490 for (int b=0; b<nBatch; b++) {
491 const int pos = counts[ b*highSize + h ];
492 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
493 const int64_t *my_gx = gx + b*batchSize + pos;
494 const uint16_t *my_low = low + b*batchSize + pos;
495 for (int i=0; i<howMany; i++) {
496 const int64_t elem = my_gx[i];
497 if (elem!=INT64_MIN) {
498 _ans[my_low[i]] += elem;
499 } else {
500 _ans[my_low[i]] = INT64_MIN;
501 break;
502 }
503 }
504 }
505 }
506 } else {
507 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
508 for (int h=0; h<highSize; h++) {
509 int64_t *restrict _ans = ansp + (h<<shift);
510 for (int b=0; b<nBatch; b++) {
511 const int pos = counts[ b*highSize + h ];
512 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
513 const int64_t *my_gx = gx + b*batchSize + pos;
514 const uint16_t *my_low = low + b*batchSize + pos;
515 for (int i=0; i<howMany; i++) {
516 const int64_t elem = my_gx[i];
517 if (elem!=INT64_MIN) _ans[my_low[i]] += elem;
518 }
519 }
520 }
521 }
522 }
523 }
524 } break;
525 case CPLXSXP: {
526 const Rcomplex *restrict gx = gather(x, &anyNA);
527 ans = PROTECT(allocVector(CPLXSXP, ngrp));
528 Rcomplex *restrict ansp = COMPLEX(ans);
529 memset(ansp, 0, ngrp*sizeof(Rcomplex));
530 if (!narm || !anyNA) {
531 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
532 for (int h=0; h<highSize; h++) {
533 Rcomplex *restrict _ans = ansp + (h<<shift);
534 for (int b=0; b<nBatch; b++) {
535 const int pos = counts[ b*highSize + h ];
536 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
537 const Rcomplex *my_gx = gx + b*batchSize + pos;
538 const uint16_t *my_low = low + b*batchSize + pos;
539 for (int i=0; i<howMany; i++) {
540 _ans[my_low[i]].r += my_gx[i].r; // let NA propagate when !narm
541 _ans[my_low[i]].i += my_gx[i].i;
542 }
543 }
544 }
545 } else {
546 // narm==true and anyNA==true
547 #pragma omp parallel for num_threads(getDTthreads(highSize, false))
548 for (int h=0; h<highSize; h++) {
549 Rcomplex *restrict _ans = ansp + (h<<shift);
550 for (int b=0; b<nBatch; b++) {
551 const int pos = counts[ b*highSize + h ];
552 const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
553 const Rcomplex *my_gx = gx + b*batchSize + pos;
554 const uint16_t *my_low = low + b*batchSize + pos;
555 for (int i=0; i<howMany; i++) {
556 const Rcomplex elem = my_gx[i];
557 if (!ISNAN(elem.r)) _ans[my_low[i]].r += elem.r;
558 if (!ISNAN(elem.i)) _ans[my_low[i]].i += elem.i;
559 }
560 }
561 }
562 }
563 } break;
564 default:
565 error(_("Type '%s' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
566 }
567 copyMostAttrib(x, ans);
568 if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
569 UNPROTECT(1);
570 return(ans);
571 }
572
gmean(SEXP x,SEXP narm)573 SEXP gmean(SEXP x, SEXP narm)
574 {
575 SEXP ans=R_NilValue;
576 //clock_t start = clock();
577 if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
578 if (!isVectorAtomic(x)) error(_("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table."));
579 if (inherits(x, "factor")) error(_("mean is not meaningful for factors."));
580 if (!LOGICAL(narm)[0]) {
581 int protecti=0;
582 ans = PROTECT(gsum(x, narm, /*#986, warnOverflow=*/ScalarLogical(FALSE))); protecti++;
583 switch(TYPEOF(ans)) {
584 case LGLSXP: case INTSXP:
585 ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
586 case REALSXP: {
587 double *xd = REAL(ans);
588 for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i]; // let NA propogate
589 } break;
590 case CPLXSXP: {
591 Rcomplex *xd = COMPLEX(ans);
592 for (int i=0; i<ngrp; i++) {
593 xd->i /= grpsize[i];
594 xd->r /= grpsize[i];
595 xd++;
596 }
597 } break;
598 default :
599 error(_("Internal error: gsum returned type '%s'. typeof(x) is '%s'"), type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov
600 }
601 UNPROTECT(protecti);
602 return(ans);
603 }
604 // na.rm=TRUE. Similar to gsum, but we need to count the non-NA as well for the divisor
605 const int n = (irowslen == -1) ? length(x) : irowslen;
606 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum");
607
608 long double *s = calloc(ngrp, sizeof(long double)), *si=NULL; // s = sum; si = sum imaginary just for complex
609 if (!s) error(_("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE"), ngrp, sizeof(long double));
610
611 int *c = calloc(ngrp, sizeof(int));
612 if (!c) error(_("Unable to allocate %d * %d bytes for counts in gmean na.rm=TRUE"), ngrp, sizeof(int));
613
614 switch(TYPEOF(x)) {
615 case LGLSXP: case INTSXP: {
616 const int *xd = INTEGER(x);
617 for (int i=0; i<n; i++) {
618 int thisgrp = grp[i];
619 int ix = (irowslen == -1) ? i : irows[i]-1;
620 if (xd[ix] == NA_INTEGER) continue;
621 s[thisgrp] += xd[ix]; // no under/overflow here, s is long double
622 c[thisgrp]++;
623 }
624 } break;
625 case REALSXP: {
626 const double *xd = REAL(x);
627 for (int i=0; i<n; i++) {
628 int thisgrp = grp[i];
629 int ix = (irowslen == -1) ? i : irows[i]-1;
630 if (ISNAN(xd[ix])) continue;
631 s[thisgrp] += xd[ix];
632 c[thisgrp]++;
633 }
634 } break;
635 case CPLXSXP: {
636 const Rcomplex *xd = COMPLEX(x);
637 si = calloc(ngrp, sizeof(long double));
638 if (!si) error(_("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE"), ngrp, sizeof(long double));
639 for (int i=0; i<n; i++) {
640 int thisgrp = grp[i];
641 int ix = (irowslen == -1) ? i : irows[i]-1;
642 if (ISNAN(xd[ix].r) || ISNAN(xd[ix].i)) continue; // || otherwise we'll need two counts in two c's too?
643 s[thisgrp] += xd[ix].r;
644 si[thisgrp] += xd[ix].i;
645 c[thisgrp]++;
646 }
647 } break;
648 default:
649 free(s); free(c); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
650 error(_("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x))); // # nocov
651 }
652 switch(TYPEOF(x)) {
653 case LGLSXP: case INTSXP: case REALSXP: {
654 ans = PROTECT(allocVector(REALSXP, ngrp));
655 double *ansd = REAL(ans);
656 for (int i=0; i<ngrp; i++) {
657 if (c[i]==0) { ansd[i] = R_NaN; continue; } // NaN to follow base::mean
658 s[i] /= c[i];
659 ansd[i] = s[i]>DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
660 }
661 } break;
662 case CPLXSXP: {
663 ans = PROTECT(allocVector(CPLXSXP, ngrp));
664 Rcomplex *ansd = COMPLEX(ans);
665 for (int i=0; i<ngrp; i++) {
666 if (c[i]==0) { ansd[i].r = R_NaN; ansd[i].i = R_NaN; continue; }
667 s[i] /= c[i];
668 si[i] /= c[i];
669 ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
670 ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]);
671 }
672 } break;
673 default:
674 error(_("Internal error: unsupported type at the end of gmean")); // # nocov
675 }
676 free(s); free(si); free(c);
677 copyMostAttrib(x, ans);
678 // Rprintf(_("this gmean na.rm=TRUE took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
679 UNPROTECT(1);
680 return(ans);
681 }
682
683 // gmin
gmin(SEXP x,SEXP narm)684 SEXP gmin(SEXP x, SEXP narm)
685 {
686 if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
687 if (!isVectorAtomic(x)) error(_("GForce min can only be applied to columns, not .SD or similar. To find min of all items in a list such as .SD, either add the prefix base::min(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,min),by=,.SDcols=]'"));
688 if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("min is not meaningful for factors."));
689 R_len_t i, ix, thisgrp=0;
690 int n = (irowslen == -1) ? length(x) : irowslen;
691 //clock_t start = clock();
692 SEXP ans;
693 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");
694 int protecti=0;
695 switch(TYPEOF(x)) {
696 case LGLSXP: case INTSXP:
697 ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
698 if (!LOGICAL(narm)[0]) {
699 for (i=0; i<ngrp; i++) INTEGER(ans)[i] = INT_MAX;
700 for (i=0; i<n; i++) {
701 thisgrp = grp[i];
702 ix = (irowslen == -1) ? i : irows[i]-1;
703 if (INTEGER(x)[ix] < INTEGER(ans)[thisgrp]) // NA_INTEGER==INT_MIN checked in init.c
704 INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
705 }
706 } else {
707 for (i=0; i<ngrp; i++) INTEGER(ans)[i] = NA_INTEGER;
708 for (i=0; i<n; i++) {
709 thisgrp = grp[i];
710 ix = (irowslen == -1) ? i : irows[i]-1;
711 if (INTEGER(x)[ix] == NA_INTEGER) continue;
712 if (INTEGER(ans)[thisgrp] == NA_INTEGER || INTEGER(x)[ix] < INTEGER(ans)[thisgrp])
713 INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
714 }
715 for (i=0; i<ngrp; i++) {
716 if (INTEGER(ans)[i] == NA_INTEGER) {
717 warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
718 ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
719 for (i=0; i<ngrp; i++) {
720 if (ISNA(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
721 }
722 break;
723 }
724 }
725 }
726 break;
727 case STRSXP:
728 ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
729 if (!LOGICAL(narm)[0]) {
730 for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, R_BlankString);
731 for (i=0; i<n; i++) {
732 thisgrp = grp[i];
733 ix = (irowslen == -1) ? i : irows[i]-1;
734 if (STRING_ELT(x, ix) == NA_STRING) {
735 SET_STRING_ELT(ans, thisgrp, NA_STRING);
736 } else {
737 if (STRING_ELT(ans, thisgrp) == R_BlankString ||
738 (STRING_ELT(ans, thisgrp) != NA_STRING && strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0 )) {
739 SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
740 }
741 }
742 }
743 } else {
744 for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, NA_STRING);
745 for (i=0; i<n; i++) {
746 thisgrp = grp[i];
747 ix = (irowslen == -1) ? i : irows[i]-1;
748 if (STRING_ELT(x, ix) == NA_STRING) continue;
749 if (STRING_ELT(ans, thisgrp) == NA_STRING ||
750 strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0) {
751 SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
752 }
753 }
754 for (i=0; i<ngrp; i++) {
755 if (STRING_ELT(ans, i)==NA_STRING) {
756 warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
757 break;
758 }
759 }
760 }
761 break;
762 case REALSXP:
763 ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
764 if (!LOGICAL(narm)[0]) {
765 for (i=0; i<ngrp; i++) REAL(ans)[i] = R_PosInf;
766 for (i=0; i<n; i++) {
767 thisgrp = grp[i];
768 ix = (irowslen == -1) ? i : irows[i]-1;
769 if (ISNAN(REAL(x)[ix]) || REAL(x)[ix] < REAL(ans)[thisgrp])
770 REAL(ans)[thisgrp] = REAL(x)[ix];
771 }
772 } else {
773 for (i=0; i<ngrp; i++) REAL(ans)[i] = NA_REAL;
774 for (i=0; i<n; i++) {
775 thisgrp = grp[i];
776 ix = (irowslen == -1) ? i : irows[i]-1;
777 if (ISNAN(REAL(x)[ix])) continue;
778 if (ISNAN(REAL(ans)[thisgrp]) || REAL(x)[ix] < REAL(ans)[thisgrp])
779 REAL(ans)[thisgrp] = REAL(x)[ix];
780 }
781 for (i=0; i<ngrp; i++) {
782 if (ISNAN(REAL(ans)[i])) {
783 warning(_("No non-missing values found in at least one group. Returning 'Inf' for such groups to be consistent with base"));
784 for (; i<ngrp; i++) if (ISNAN(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
785 break;
786 }
787 }
788 }
789 break;
790 case CPLXSXP:
791 error(_("Type 'complex' has no well-defined min"));
792 break;
793 default:
794 error(_("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
795 }
796 copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
797 UNPROTECT(protecti); // ans + maybe 1 coerced ans
798 // Rprintf(_("this gmin took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
799 return(ans);
800 }
801
802 // gmax
gmax(SEXP x,SEXP narm)803 SEXP gmax(SEXP x, SEXP narm)
804 {
805 if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
806 if (!isVectorAtomic(x)) error(_("GForce max can only be applied to columns, not .SD or similar. To find max of all items in a list such as .SD, either add the prefix base::max(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,max),by=,.SDcols=]'"));
807 if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("max is not meaningful for factors."));
808 R_len_t i, ix, thisgrp=0;
809 int n = (irowslen == -1) ? length(x) : irowslen;
810 //clock_t start = clock();
811 SEXP ans;
812 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmax");
813
814 // TODO rework gmax in the same way as gmin and remove this *update
815 char *update = (char *)R_alloc(ngrp, sizeof(char));
816 for (int i=0; i<ngrp; i++) update[i] = 0;
817 int protecti=0;
818 switch(TYPEOF(x)) {
819 case LGLSXP: case INTSXP:
820 ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
821 for (i=0; i<ngrp; i++) INTEGER(ans)[i] = 0;
822 if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
823 for (i=0; i<n; i++) {
824 thisgrp = grp[i];
825 ix = (irowslen == -1) ? i : irows[i]-1;
826 if (INTEGER(x)[ix] != NA_INTEGER && INTEGER(ans)[thisgrp] != NA_INTEGER) {
827 if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
828 INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
829 if (update[thisgrp] != 1) update[thisgrp] = 1;
830 }
831 } else INTEGER(ans)[thisgrp] = NA_INTEGER;
832 }
833 } else {
834 for (i=0; i<n; i++) {
835 thisgrp = grp[i];
836 ix = (irowslen == -1) ? i : irows[i]-1;
837 if (INTEGER(x)[ix] != NA_INTEGER) {
838 if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
839 INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
840 if (update[thisgrp] != 1) update[thisgrp] = 1;
841 }
842 } else {
843 if (update[thisgrp] != 1) {
844 INTEGER(ans)[thisgrp] = NA_INTEGER;
845 }
846 }
847 }
848 for (i=0; i<ngrp; i++) {
849 if (update[i] != 1) {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
850 warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
851 ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
852 for (i=0; i<ngrp; i++) {
853 if (update[i] != 1) REAL(ans)[i] = -R_PosInf;
854 }
855 break;
856 }
857 }
858 }
859 break;
860 case STRSXP:
861 ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
862 for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, mkChar(""));
863 if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
864 for (i=0; i<n; i++) {
865 thisgrp = grp[i];
866 ix = (irowslen == -1) ? i : irows[i]-1;
867 if (STRING_ELT(x,ix) != NA_STRING && STRING_ELT(ans, thisgrp) != NA_STRING) {
868 if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x,ix))) < 0 ) {
869 SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
870 if (update[thisgrp] != 1) update[thisgrp] = 1;
871 }
872 } else SET_STRING_ELT(ans, thisgrp, NA_STRING);
873 }
874 } else {
875 for (i=0; i<n; i++) {
876 thisgrp = grp[i];
877 ix = (irowslen == -1) ? i : irows[i]-1;
878 if (STRING_ELT(x, ix) != NA_STRING) {
879 if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x, ix))) < 0 ) {
880 SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
881 if (update[thisgrp] != 1) update[thisgrp] = 1;
882 }
883 } else {
884 if (update[thisgrp] != 1) {
885 SET_STRING_ELT(ans, thisgrp, NA_STRING);
886 }
887 }
888 }
889 for (i=0; i<ngrp; i++) {
890 if (update[i] != 1) {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
891 warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
892 break;
893 }
894 }
895 }
896 break;
897 case REALSXP:
898 ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
899 for (i=0; i<ngrp; i++) REAL(ans)[i] = 0;
900 if (!LOGICAL(narm)[0]) {
901 for (i=0; i<n; i++) {
902 thisgrp = grp[i];
903 ix = (irowslen == -1) ? i : irows[i]-1;
904 if ( !ISNA(REAL(x)[ix]) && !ISNA(REAL(ans)[thisgrp]) ) {
905 if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ||
906 (ISNAN(REAL(x)[ix]) && !ISNAN(REAL(ans)[thisgrp])) ) { // #1461
907 REAL(ans)[thisgrp] = REAL(x)[ix];
908 if (update[thisgrp] != 1) update[thisgrp] = 1;
909 }
910 } else REAL(ans)[thisgrp] = NA_REAL;
911 }
912 } else {
913 for (i=0; i<n; i++) {
914 thisgrp = grp[i];
915 ix = (irowslen == -1) ? i : irows[i]-1;
916 if ( !ISNAN(REAL(x)[ix]) ) { // #1461
917 if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ) {
918 REAL(ans)[thisgrp] = REAL(x)[ix];
919 if (update[thisgrp] != 1) update[thisgrp] = 1;
920 }
921 } else {
922 if (update[thisgrp] != 1) {
923 REAL(ans)[thisgrp] = -R_PosInf;
924 }
925 }
926 }
927 // everything taken care of already. Just warn if all NA groups have occurred at least once
928 for (i=0; i<ngrp; i++) {
929 if (update[i] != 1) { // equivalent of REAL(ans)[thisgrp] == -R_PosInf
930 warning(_("No non-missing values found in at least one group. Returning '-Inf' for such groups to be consistent with base"));
931 break;
932 }
933 }
934 }
935 break;
936 case CPLXSXP:
937 error(_("Type 'complex' has no well-defined max"));
938 break;
939 default:
940 error(_("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
941 }
942 copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
943 UNPROTECT(protecti);
944 // Rprintf(_("this gmax took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
945 return(ans);
946 }
947
948 // gmedian, always returns numeric type (to avoid as.numeric() wrap..)
gmedian(SEXP x,SEXP narmArg)949 SEXP gmedian(SEXP x, SEXP narmArg) {
950 if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
951 if (!isVectorAtomic(x)) error(_("GForce median can only be applied to columns, not .SD or similar. To find median of all items in a list such as .SD, either add the prefix stats::median(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,median),by=,.SDcols=]'"));
952 if (inherits(x, "factor")) error(_("median is not meaningful for factors."));
953 const bool isInt64 = INHERITS(x, char_integer64), narm = LOGICAL(narmArg)[0];
954 int n = (irowslen == -1) ? length(x) : irowslen;
955 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian");
956 SEXP ans = PROTECT(allocVector(REALSXP, ngrp));
957 double *ansd = REAL(ans);
958 switch(TYPEOF(x)) {
959 case REALSXP: {
960 double *subd = REAL(PROTECT(allocVector(REALSXP, maxgrpn))); // allocate once upfront and reuse
961 int64_t *xi64 = (int64_t *)REAL(x);
962 double *xd = REAL(x);
963 for (int i=0; i<ngrp; ++i) {
964 int thisgrpsize = grpsize[i], nacount=0;
965 for (int j=0; j<thisgrpsize; ++j) {
966 int k = ff[i]+j-1;
967 if (isunsorted) k = oo[k]-1;
968 k = (irowslen == -1) ? k : irows[k]-1;
969 if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
970 else subd[j-nacount] = xd[k];
971 }
972 thisgrpsize -= nacount; // all-NA is returned as NA_REAL via n==0 case inside *quickselect
973 ansd[i] = (nacount && !narm) ? NA_REAL : (isInt64 ? i64quickselect((void *)subd, thisgrpsize) : dquickselect(subd, thisgrpsize));
974 }}
975 break;
976 case LGLSXP: case INTSXP: {
977 int *subi = INTEGER(PROTECT(allocVector(INTSXP, maxgrpn)));
978 int *xi = INTEGER(x);
979 for (int i=0; i<ngrp; i++) {
980 int thisgrpsize = grpsize[i], nacount=0;
981 for (int j=0; j<thisgrpsize; ++j) {
982 int k = ff[i]+j-1;
983 if (isunsorted) k = oo[k]-1;
984 k = (irowslen == -1) ? k : irows[k]-1;
985 if (xi[k]==NA_INTEGER) nacount++;
986 else subi[j-nacount] = xi[k];
987 }
988 ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount);
989 }}
990 break;
991 default:
992 error(_("Type '%s' not supported by GForce median (gmedian). Either add the prefix stats::median(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
993 }
994 if (!isInt64) copyMostAttrib(x, ans);
995 // else the integer64 class needs to be dropped since double is always returned by gmedian
996 UNPROTECT(2); // ans, subd|subi
997 return ans;
998 }
999
glast(SEXP x)1000 SEXP glast(SEXP x) {
1001
1002 R_len_t i,k;
1003 int n = (irowslen == -1) ? length(x) : irowslen;
1004 SEXP ans;
1005 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gtail");
1006 switch(TYPEOF(x)) {
1007 case LGLSXP: {
1008 const int *ix = LOGICAL(x);
1009 ans = PROTECT(allocVector(LGLSXP, ngrp));
1010 int *ians = LOGICAL(ans);
1011 for (i=0; i<ngrp; i++) {
1012 k = ff[i]+grpsize[i]-2;
1013 if (isunsorted) k = oo[k]-1;
1014 k = (irowslen == -1) ? k : irows[k]-1;
1015 ians[i] = ix[k];
1016 }
1017 }
1018 break;
1019 case INTSXP: {
1020 const int *ix = INTEGER(x);
1021 ans = PROTECT(allocVector(INTSXP, ngrp));
1022 int *ians = INTEGER(ans);
1023 for (i=0; i<ngrp; i++) {
1024 k = ff[i]+grpsize[i]-2;
1025 if (isunsorted) k = oo[k]-1;
1026 k = (irowslen == -1) ? k : irows[k]-1;
1027 ians[i] = ix[k];
1028 }
1029 }
1030 break;
1031 case REALSXP: {
1032 const double *dx = REAL(x);
1033 ans = PROTECT(allocVector(REALSXP, ngrp));
1034 double *dans = REAL(ans);
1035 for (i=0; i<ngrp; i++) {
1036 k = ff[i]+grpsize[i]-2;
1037 if (isunsorted) k = oo[k]-1;
1038 k = (irowslen == -1) ? k : irows[k]-1;
1039 dans[i] = dx[k];
1040 }
1041 }
1042 break;
1043 case CPLXSXP: {
1044 const Rcomplex *dx = COMPLEX(x);
1045 ans = PROTECT(allocVector(CPLXSXP, ngrp));
1046 Rcomplex *dans = COMPLEX(ans);
1047 for (i=0; i<ngrp; i++) {
1048 k = ff[i]+grpsize[i]-2;
1049 if (isunsorted) k = oo[k]-1;
1050 k = (irowslen == -1) ? k : irows[k]-1;
1051 dans[i] = dx[k];
1052 }
1053 } break;
1054 case STRSXP:
1055 ans = PROTECT(allocVector(STRSXP, ngrp));
1056 for (i=0; i<ngrp; i++) {
1057 k = ff[i]+grpsize[i]-2;
1058 if (isunsorted) k = oo[k]-1;
1059 k = (irowslen == -1) ? k : irows[k]-1;
1060 SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1061 }
1062 break;
1063 case VECSXP:
1064 ans = PROTECT(allocVector(VECSXP, ngrp));
1065 for (i=0; i<ngrp; i++) {
1066 k = ff[i]+grpsize[i]-2;
1067 if (isunsorted) k = oo[k]-1;
1068 k = (irowslen == -1) ? k : irows[k]-1;
1069 SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1070 }
1071 break;
1072 default:
1073 error(_("Type '%s' not supported by GForce tail (gtail). Either add the prefix utils::tail(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1074 }
1075 copyMostAttrib(x, ans);
1076 UNPROTECT(1);
1077 return(ans);
1078 }
1079
gfirst(SEXP x)1080 SEXP gfirst(SEXP x) {
1081
1082 R_len_t i,k;
1083 int n = (irowslen == -1) ? length(x) : irowslen;
1084 SEXP ans;
1085 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
1086 switch(TYPEOF(x)) {
1087 case LGLSXP: {
1088 int const *ix = LOGICAL(x);
1089 ans = PROTECT(allocVector(LGLSXP, ngrp));
1090 int *ians = LOGICAL(ans);
1091 for (i=0; i<ngrp; i++) {
1092 k = ff[i]-1;
1093 if (isunsorted) k = oo[k]-1;
1094 k = (irowslen == -1) ? k : irows[k]-1;
1095 ians[i] = ix[k];
1096 }
1097 }
1098 break;
1099 case INTSXP: {
1100 const int *ix = INTEGER(x);
1101 ans = PROTECT(allocVector(INTSXP, ngrp));
1102 int *ians = INTEGER(ans);
1103 for (i=0; i<ngrp; i++) {
1104 k = ff[i]-1;
1105 if (isunsorted) k = oo[k]-1;
1106 k = (irowslen == -1) ? k : irows[k]-1;
1107 ians[i] = ix[k];
1108 }
1109 }
1110 break;
1111 case REALSXP: {
1112 const double *dx = REAL(x);
1113 ans = PROTECT(allocVector(REALSXP, ngrp));
1114 double *dans = REAL(ans);
1115 for (i=0; i<ngrp; i++) {
1116 k = ff[i]-1;
1117 if (isunsorted) k = oo[k]-1;
1118 k = (irowslen == -1) ? k : irows[k]-1;
1119 dans[i] = dx[k];
1120 }
1121 }
1122 break;
1123 case CPLXSXP: {
1124 const Rcomplex *dx = COMPLEX(x);
1125 ans = PROTECT(allocVector(CPLXSXP, ngrp));
1126 Rcomplex *dans = COMPLEX(ans);
1127 for (i=0; i<ngrp; i++) {
1128 k = ff[i]-1;
1129 if (isunsorted) k = oo[k]-1;
1130 k = (irowslen == -1) ? k : irows[k]-1;
1131 dans[i] = dx[k];
1132 }
1133 } break;
1134 case STRSXP:
1135 ans = PROTECT(allocVector(STRSXP, ngrp));
1136 for (i=0; i<ngrp; i++) {
1137 k = ff[i]-1;
1138 if (isunsorted) k = oo[k]-1;
1139 k = (irowslen == -1) ? k : irows[k]-1;
1140 SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1141 }
1142 break;
1143 case VECSXP:
1144 ans = PROTECT(allocVector(VECSXP, ngrp));
1145 for (i=0; i<ngrp; i++) {
1146 k = ff[i]-1;
1147 if (isunsorted) k = oo[k]-1;
1148 k = (irowslen == -1) ? k : irows[k]-1;
1149 SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1150 }
1151 break;
1152 default:
1153 error(_("Type '%s' not supported by GForce head (ghead). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1154 }
1155 copyMostAttrib(x, ans);
1156 UNPROTECT(1);
1157 return(ans);
1158 }
1159
gtail(SEXP x,SEXP valArg)1160 SEXP gtail(SEXP x, SEXP valArg) {
1161 if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, gtail is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1162 return (glast(x));
1163 }
1164
ghead(SEXP x,SEXP valArg)1165 SEXP ghead(SEXP x, SEXP valArg) {
1166 if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, ghead is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1167 return (gfirst(x));
1168 }
1169
gnthvalue(SEXP x,SEXP valArg)1170 SEXP gnthvalue(SEXP x, SEXP valArg) {
1171
1172 if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]<=0) error(_("Internal error, `g[` (gnthvalue) is only implemented single value subsets with positive index, e.g., .SD[2]. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1173 R_len_t i,k, val=INTEGER(valArg)[0];
1174 int n = (irowslen == -1) ? length(x) : irowslen;
1175 SEXP ans;
1176 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
1177 switch(TYPEOF(x)) {
1178 case LGLSXP: {
1179 const int *ix = LOGICAL(x);
1180 ans = PROTECT(allocVector(LGLSXP, ngrp));
1181 int *ians = LOGICAL(ans);
1182 for (i=0; i<ngrp; i++) {
1183 if (val > grpsize[i]) { LOGICAL(ans)[i] = NA_LOGICAL; continue; }
1184 k = ff[i]+val-2;
1185 if (isunsorted) k = oo[k]-1;
1186 k = (irowslen == -1) ? k : irows[k]-1;
1187 ians[i] = ix[k];
1188 }
1189 }
1190 break;
1191 case INTSXP: {
1192 const int *ix = INTEGER(x);
1193 ans = PROTECT(allocVector(INTSXP, ngrp));
1194 int *ians = INTEGER(ans);
1195 for (i=0; i<ngrp; i++) {
1196 if (val > grpsize[i]) { INTEGER(ans)[i] = NA_INTEGER; continue; }
1197 k = ff[i]+val-2;
1198 if (isunsorted) k = oo[k]-1;
1199 k = (irowslen == -1) ? k : irows[k]-1;
1200 ians[i] = ix[k];
1201 }
1202 }
1203 break;
1204 case REALSXP: {
1205 const double *dx = REAL(x);
1206 ans = PROTECT(allocVector(REALSXP, ngrp));
1207 double *dans = REAL(ans);
1208 for (i=0; i<ngrp; i++) {
1209 if (val > grpsize[i]) { REAL(ans)[i] = NA_REAL; continue; }
1210 k = ff[i]+val-2;
1211 if (isunsorted) k = oo[k]-1;
1212 k = (irowslen == -1) ? k : irows[k]-1;
1213 dans[i] = dx[k];
1214 }
1215 }
1216 break;
1217 case CPLXSXP: {
1218 const Rcomplex *dx = COMPLEX(x);
1219 ans = PROTECT(allocVector(CPLXSXP, ngrp));
1220 Rcomplex *dans = COMPLEX(ans);
1221 for (i=0; i<ngrp; i++) {
1222 if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; }
1223 k = ff[i]+val-2;
1224 if (isunsorted) k = oo[k]-1;
1225 k = (irowslen == -1) ? k : irows[k]-1;
1226 dans[i] = dx[k];
1227 }
1228 } break;
1229 case STRSXP:
1230 ans = PROTECT(allocVector(STRSXP, ngrp));
1231 for (i=0; i<ngrp; i++) {
1232 if (val > grpsize[i]) { SET_STRING_ELT(ans, i, NA_STRING); continue; }
1233 k = ff[i]+val-2;
1234 if (isunsorted) k = oo[k]-1;
1235 k = (irowslen == -1) ? k : irows[k]-1;
1236 SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1237 }
1238 break;
1239 case VECSXP:
1240 ans = PROTECT(allocVector(VECSXP, ngrp));
1241 for (i=0; i<ngrp; i++) {
1242 if (val > grpsize[i]) { SET_VECTOR_ELT(ans, i, R_NilValue); continue; }
1243 k = ff[i]+val-2;
1244 if (isunsorted) k = oo[k]-1;
1245 k = (irowslen == -1) ? k : irows[k]-1;
1246 SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1247 }
1248 break;
1249 default:
1250 error(_("Type '%s' not supported by GForce subset `[` (gnthvalue). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1251 }
1252 copyMostAttrib(x, ans);
1253 UNPROTECT(1);
1254 return(ans);
1255 }
1256
1257 // TODO: gwhich.min, gwhich.max
1258 // implemented this similar to gmedian to balance well between speed and memory usage. There's one extra allocation on maximum groups and that's it.. and that helps speed things up extremely since we don't have to collect x's values for each group for each step (mean, residuals, mean again and then variance).
gvarsd1(SEXP x,SEXP narm,Rboolean isSD)1259 SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
1260 {
1261 if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
1262 if (!isVectorAtomic(x)) error(_("GForce var/sd can only be applied to columns, not .SD or similar. For the full covariance matrix of all items in a list such as .SD, either add the prefix stats::var(.SD) (or stats::sd(.SD)) or turn off GForce optimization using options(datatable.optimize=1). Alternatively, if you only need the diagonal elements, 'DT[,lapply(.SD,var),by=,.SDcols=]' is the optimized way to do this."));
1263 if (inherits(x, "factor")) error(_("var/sd is not meaningful for factors."));
1264 long double m, s, v;
1265 R_len_t i, j, ix, thisgrpsize = 0, n = (irowslen == -1) ? length(x) : irowslen;
1266 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar");
1267 SEXP sub, ans = PROTECT(allocVector(REALSXP, ngrp));
1268 Rboolean ans_na;
1269 switch(TYPEOF(x)) {
1270 case LGLSXP: case INTSXP:
1271 sub = PROTECT(allocVector(INTSXP, maxgrpn)); // allocate once upfront
1272 if (!LOGICAL(narm)[0]) {
1273 for (i=0; i<ngrp; i++) {
1274 m=0.; s=0.; v=0.; ans_na = FALSE;
1275 if (grpsize[i] != 1) {
1276 thisgrpsize = grpsize[i];
1277 SETLENGTH(sub, thisgrpsize); // to gather this group's data
1278 for (j=0; j<thisgrpsize; j++) {
1279 ix = ff[i]+j-1;
1280 if (isunsorted) ix = oo[ix]-1;
1281 ix = (irowslen == -1) ? ix : irows[ix]-1;
1282 if (INTEGER(x)[ix] == NA_INTEGER) { ans_na = TRUE; break; }
1283 INTEGER(sub)[j] = INTEGER(x)[ix];
1284 m += INTEGER(sub)[j]; // sum
1285 }
1286 if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
1287 m = m/thisgrpsize; // mean, first pass
1288 for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
1289 m += (s/thisgrpsize); // mean, second pass
1290 for (j=0; j<thisgrpsize; j++) { // variance
1291 v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
1292 }
1293 REAL(ans)[i] = (double)v/(thisgrpsize-1);
1294 if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1295 } else REAL(ans)[i] = NA_REAL;
1296 }
1297 } else {
1298 for (i=0; i<ngrp; i++) {
1299 m=0.; s=0.; v=0.; thisgrpsize = 0;
1300 if (grpsize[i] != 1) {
1301 SETLENGTH(sub, grpsize[i]); // to gather this group's data
1302 for (j=0; j<grpsize[i]; j++) {
1303 ix = ff[i]+j-1;
1304 if (isunsorted) ix = oo[ix]-1;
1305 ix = (irowslen == -1) ? ix : irows[ix]-1;
1306 if (INTEGER(x)[ix] == NA_INTEGER) continue;
1307 INTEGER(sub)[thisgrpsize] = INTEGER(x)[ix];
1308 m += INTEGER(sub)[thisgrpsize]; // sum
1309 thisgrpsize++;
1310 }
1311 if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
1312 m = m/thisgrpsize; // mean, first pass
1313 for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
1314 m += (s/thisgrpsize); // mean, second pass
1315 for (j=0; j<thisgrpsize; j++) { // variance
1316 v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
1317 }
1318 REAL(ans)[i] = (double)v/(thisgrpsize-1);
1319 if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1320 } else REAL(ans)[i] = NA_REAL;
1321 }
1322 }
1323 SETLENGTH(sub, maxgrpn);
1324 break;
1325 case REALSXP:
1326 sub = PROTECT(allocVector(REALSXP, maxgrpn)); // allocate once upfront
1327 if (!LOGICAL(narm)[0]) {
1328 for (i=0; i<ngrp; i++) {
1329 m=0.; s=0.; v=0.; ans_na = FALSE;
1330 if (grpsize[i] != 1) {
1331 thisgrpsize = grpsize[i];
1332 SETLENGTH(sub, thisgrpsize); // to gather this group's data
1333 for (j=0; j<thisgrpsize; j++) {
1334 ix = ff[i]+j-1;
1335 if (isunsorted) ix = oo[ix]-1;
1336 ix = (irowslen == -1) ? ix : irows[ix]-1;
1337 if (ISNAN(REAL(x)[ix])) { ans_na = TRUE; break; }
1338 REAL(sub)[j] = REAL(x)[ix];
1339 m += REAL(sub)[j]; // sum
1340 }
1341 if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
1342 m = m/thisgrpsize; // mean, first pass
1343 for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
1344 m += (s/thisgrpsize); // mean, second pass
1345 for (j=0; j<thisgrpsize; j++) { // variance
1346 v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
1347 }
1348 REAL(ans)[i] = (double)v/(thisgrpsize-1);
1349 if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1350 } else REAL(ans)[i] = NA_REAL;
1351 }
1352 } else {
1353 for (i=0; i<ngrp; i++) {
1354 m=0.; s=0.; v=0.; thisgrpsize = 0;
1355 if (grpsize[i] != 1) {
1356 SETLENGTH(sub, grpsize[i]); // to gather this group's data
1357 for (j=0; j<grpsize[i]; j++) {
1358 ix = ff[i]+j-1;
1359 if (isunsorted) ix = oo[ix]-1;
1360 ix = (irowslen == -1) ? ix : irows[ix]-1;
1361 if (ISNAN(REAL(x)[ix])) continue;
1362 REAL(sub)[thisgrpsize] = REAL(x)[ix];
1363 m += REAL(sub)[thisgrpsize]; // sum
1364 thisgrpsize++;
1365 }
1366 if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
1367 m = m/thisgrpsize; // mean, first pass
1368 for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
1369 m += (s/thisgrpsize); // mean, second pass
1370 for (j=0; j<thisgrpsize; j++) { // variance
1371 v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
1372 }
1373 REAL(ans)[i] = (double)v/(thisgrpsize-1);
1374 if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1375 } else REAL(ans)[i] = NA_REAL;
1376 }
1377 }
1378 SETLENGTH(sub, maxgrpn);
1379 break;
1380 default:
1381 if (!isSD) {
1382 error(_("Type '%s' not supported by GForce var (gvar). Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1383 } else {
1384 error(_("Type '%s' not supported by GForce sd (gsd). Either add the prefix stats::sd(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1385 }
1386 }
1387 // no copyMostAttrib(x, ans) since class (e.g. Date) unlikely applicable to sd/var
1388 UNPROTECT(2);
1389 return (ans);
1390 }
1391
gvar(SEXP x,SEXP narm)1392 SEXP gvar(SEXP x, SEXP narm) {
1393 return (gvarsd1(x, narm, FALSE));
1394 }
1395
gsd(SEXP x,SEXP narm)1396 SEXP gsd(SEXP x, SEXP narm) {
1397 return (gvarsd1(x, narm, TRUE));
1398 }
1399
gprod(SEXP x,SEXP narm)1400 SEXP gprod(SEXP x, SEXP narm)
1401 {
1402 if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
1403 if (!isVectorAtomic(x)) error(_("GForce prod can only be applied to columns, not .SD or similar. To multiply all items in a list such as .SD, either add the prefix base::prod(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,prod),by=,.SDcols=]'"));
1404 if (inherits(x, "factor")) error(_("prod is not meaningful for factors."));
1405 int i, ix, thisgrp;
1406 int n = (irowslen == -1) ? length(x) : irowslen;
1407 //clock_t start = clock();
1408 SEXP ans;
1409 if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod");
1410 long double *s = malloc(ngrp * sizeof(long double));
1411 if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double));
1412 for (i=0; i<ngrp; i++) s[i] = 1.0;
1413 ans = PROTECT(allocVector(REALSXP, ngrp));
1414 switch(TYPEOF(x)) {
1415 case LGLSXP: case INTSXP:
1416 for (i=0; i<n; i++) {
1417 thisgrp = grp[i];
1418 ix = (irowslen == -1) ? i : irows[i]-1;
1419 if(INTEGER(x)[ix] == NA_INTEGER) {
1420 if (!LOGICAL(narm)[0]) s[thisgrp] = NA_REAL; // Let NA_REAL propogate from here. R_NaReal is IEEE.
1421 continue;
1422 }
1423 s[thisgrp] *= INTEGER(x)[ix]; // no under/overflow here, s is long double (like base)
1424 }
1425 for (i=0; i<ngrp; i++) {
1426 if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
1427 else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
1428 else REAL(ans)[i] = (double)s[i];
1429 }
1430 break;
1431 case REALSXP:
1432 for (i=0; i<n; i++) {
1433 thisgrp = grp[i];
1434 ix = (irowslen == -1) ? i : irows[i]-1;
1435 if(ISNAN(REAL(x)[ix]) && LOGICAL(narm)[0]) continue; // else let NA_REAL propogate from here
1436 s[thisgrp] *= REAL(x)[ix]; // done in long double, like base
1437 }
1438 for (i=0; i<ngrp; i++) {
1439 if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
1440 else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
1441 else REAL(ans)[i] = (double)s[i];
1442 }
1443 break;
1444 default:
1445 free(s);
1446 error(_("Type '%s' not supported by GForce prod (gprod). Either add the prefix base::prod(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1447 }
1448 free(s);
1449 copyMostAttrib(x, ans);
1450 UNPROTECT(1);
1451 // Rprintf(_("this gprod took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
1452 return(ans);
1453 }
1454