1 #include "data.table.h"
2 //#include <time.h>
3 
4 static int ngrp = 0;         // number of groups
5 static int *grpsize = NULL;  // size of each group, used by gmean (and gmedian) not gsum
6 static int nrow = 0;         // length of underlying x; same as length(ghigh) and length(glow)
7 static int *irows;           // GForce support for subsets in 'i' (TODO: joins in 'i')
8 static int irowslen = -1;    // -1 is for irows = NULL
9 static uint16_t *high=NULL, *low=NULL;  // the group of each x item; a.k.a. which-group-am-I
10 static int *restrict grp;    // TODO: eventually this can be made local for gforce as won't be needed globally when all functions here use gather
11 static size_t highSize;
12 static int shift, mask;
13 static char *gx=NULL;
14 
15 static size_t nBatch, batchSize, lastBatchSize;
16 static int *counts, *tmpcounts;
17 
18 // for gmedian
19 static int maxgrpn = 0;
20 static int *oo = NULL;
21 static int *ff = NULL;
22 static int isunsorted = 0;
23 
24 // from R's src/cov.c (for variance / sd)
25 #ifdef HAVE_LONG_DOUBLE
26 # define SQRTL sqrtl
27 #else
28 # define SQRTL sqrt
29 #endif
30 
nbit(int n)31 static int nbit(int n)
32 {
33   // returns position of biggest bit; i.e. floor(log2(n))+1 without using fpa
34   // not needed to be fast. Just a helper function.
35   int nb=0;
36   while (n) { nb++; n>>=1; }
37   return nb;
38 }
39 
gforce(SEXP env,SEXP jsub,SEXP o,SEXP f,SEXP l,SEXP irowsArg)40 SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
41   double started = wallclock();
42   const bool verbose = GetVerbose();
43   if (TYPEOF(env) != ENVSXP) error(_("env is not an environment"));
44   // The type of jsub is pretty flexbile in R, so leave checking to eval() below.
45   if (!isInteger(o)) error(_("%s is not an integer vector"), "o");
46   if (!isInteger(f)) error(_("%s is not an integer vector"), "f");
47   if (!isInteger(l)) error(_("%s is not an integer vector"), "l");
48   if (isNull(irowsArg)) {
49     irows = NULL;
50     irowslen = -1;
51   }
52   else if (isInteger(irowsArg)) {
53     irows = INTEGER(irowsArg);
54     irowslen = LENGTH(irowsArg);
55   }
56   else error(_("irowsArg is neither an integer vector nor NULL"));  // # nocov
57   ngrp = LENGTH(l);
58   if (LENGTH(f) != ngrp) error(_("length(f)=%d != length(l)=%d"), LENGTH(f), ngrp);
59   nrow=0;
60   grpsize = INTEGER(l);
61   maxgrpn = 0;
62   for (int i=0; i<ngrp; i++) {
63     nrow+=grpsize[i];
64     if (grpsize[i]>maxgrpn) maxgrpn = grpsize[i];  // old comment to be checked: 'needed for #2046 and #2111 when maxgrpn attribute is not attached to empty o'
65   }
66   if (LENGTH(o) && LENGTH(o)!=nrow) error(_("o has length %d but sum(l)=%d"), LENGTH(o), nrow);
67   {
68     SEXP tt = getAttrib(o, install("maxgrpn"));
69     if (length(tt)==1 && INTEGER(tt)[0]!=maxgrpn) error(_("Internal error: o's maxgrpn attribute mismatches recalculated maxgrpn")); // # nocov
70   }
71 
72   int nb = nbit(ngrp-1);
73   shift = nb/2;    // /2 so that high and low can be uint16_t, and no limit (even for nb=4) to stress-test.
74   // shift=MAX(nb-8,0); if (shift>16) shift=nb/2;     // TODO: when we have stress-test off mode, do this
75   mask = (1<<shift)-1;
76   highSize = ((ngrp-1)>>shift) + 1;
77 
78   grp = (int *)R_alloc(nrow, sizeof(int));   // TODO: use malloc and made this local as not needed globally when all functions here use gather
79                                              // maybe better to malloc to avoid R's heap. This grp isn't global, so it doesn't need to be R_alloc
80   const int *restrict fp = INTEGER(f);
81 
82   nBatch = MIN((nrow+1)/2, getDTthreads(nrow, true)*2);  // *2 to reduce last-thread-home. TODO: experiment. The higher this is though, the bigger is counts[]
83   batchSize = MAX(1, (nrow-1)/nBatch);
84   lastBatchSize = nrow - (nBatch-1)*batchSize;
85   // We deliberate use, for example, 40 batches of just 14 rows, to stress-test tests. This strategy proved to be a good one as #3204 immediately came to light.
86   // TODO: enable stress-test mode in tests only (#3205) which can be turned off by default in release to decrease overhead on small data
87   //       if that is established to be biting (it may be fine).
88   if (nBatch<1 || batchSize<1 || lastBatchSize<1) {
89     error(_("Internal error: nrow=%d  ngrp=%d  nbit=%d  shift=%d  highSize=%d  nBatch=%d  batchSize=%d  lastBatchSize=%d\n"),  // # nocov
90            nrow, ngrp, nb, shift, highSize, nBatch, batchSize, lastBatchSize);                                              // # nocov
91   }
92   // initial population of g:
93   #pragma omp parallel for num_threads(getDTthreads(ngrp, false))
94   for (int g=0; g<ngrp; g++) {
95     int *elem = grp + fp[g]-1;
96     for (int j=0; j<grpsize[g]; j++)  elem[j] = g;
97   }
98   if (verbose) { Rprintf(_("gforce initial population of grp took %.3f\n"), wallclock()-started); started=wallclock(); }
99   isunsorted = 0;
100   if (LENGTH(o)) {
101     isunsorted = 1; // for gmedian
102 
103     // What follows is more cache-efficient version of this scattered assign :
104     // for (int g=0; g<ngrp; g++) {
105     //  const int *elem = op + fp[g]-1;
106     //  for (int j=0; j<grpsize[g]; j++)  grp[ elem[j]-1 ] = g;
107     //}
108 
109     const int *restrict op = INTEGER(o);  // o is a permutation of 1:nrow
110     int nb = nbit(nrow-1);
111     int shift = MAX(nb-8, 0);  // TODO: experiment nb/2.  Here it doesn't have to be /2 currently.
112     int highSize = ((nrow-1)>>shift) + 1;
113     //Rprintf(_("When assigning grp[o] = g, highSize=%d  nb=%d  shift=%d  nBatch=%d\n"), highSize, nb, shift, nBatch);
114     int *counts = calloc(nBatch*highSize, sizeof(int));  // TODO: cache-line align and make highSize a multiple of 64
115     int *TMP   = malloc(nrow*2l*sizeof(int)); // must multiple the long int otherwise overflow may happen, #4295
116     if (!counts || !TMP ) error(_("Internal error: Failed to allocate counts or TMP when assigning g in gforce"));
117     #pragma omp parallel for num_threads(getDTthreads(nBatch, false))   // schedule(dynamic,1)
118     for (int b=0; b<nBatch; b++) {
119       const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
120       const int *my_o = op + b*batchSize;
121       int *restrict my_counts = counts + b*highSize;
122       for (int i=0; i<howMany; i++) {
123         const int w = (my_o[i]-1) >> shift;
124         my_counts[w]++;
125       }
126       for (int i=0, cum=0; i<highSize; i++) {
127         int tmp = my_counts[i];
128         my_counts[i] = cum;
129         cum += tmp;
130       }
131       const int *restrict my_g = grp + b*batchSize;
132       int *restrict my_tmp = TMP + b*2*batchSize;
133       for (int i=0; i<howMany; i++) {
134         const int w = (my_o[i]-1) >> shift;   // could use my_high but may as well use my_pg since we need my_pg anyway for the lower bits next too
135         int *p = my_tmp + 2*my_counts[w]++;
136         *p++ = my_o[i]-1;
137         *p   = my_g[i];
138       }
139     }
140     //Rprintf(_("gforce assign TMP (o,g) pairs took %.3f\n"), wallclock()-started); started=wallclock();
141     #pragma omp parallel for num_threads(getDTthreads(highSize, false))
142     for (int h=0; h<highSize; h++) {  // very important that high is first loop here
143       for (int b=0; b<nBatch; b++) {
144         const int start = h==0 ? 0 : counts[ b*highSize + h - 1 ];
145         const int end   = counts[ b*highSize + h ];
146         const int *restrict p = TMP + b*2*batchSize + start*2;
147         for (int k=start; k<end; k++, p+=2) {
148           grp[p[0]] = p[1];  // TODO: could write high here, and initial low.   ** If so, same in initial population when o is missing **
149         }
150       }
151     }
152     free(counts);
153     free(TMP);
154     //Rprintf(_("gforce assign TMP [ (o,g) pairs ] back to grp took %.3f\n"), wallclock()-started); started=wallclock();
155   }
156 
157   high = (uint16_t *)R_alloc(nrow, sizeof(uint16_t));  // maybe better to malloc to avoid R's heap, but safer to R_alloc since it's done via eval()
158   low  = (uint16_t *)R_alloc(nrow, sizeof(uint16_t));
159   // global ghigh and glow because the g* functions (inside jsub) share this common memory
160 
161   gx = (char *)R_alloc(nrow, sizeof(Rcomplex));  // enough for a copy of one column (or length(irows) if supplied)
162   // TODO: reduce to the largest type present; won't be faster (untouched RAM won't be fetched) but it will increase the largest size that works.
163 
164   counts = (int *)S_alloc(nBatch*highSize, sizeof(int));  // (S_ zeros) TODO: cache-line align and make highSize a multiple of 64
165   tmpcounts = (int *)R_alloc(getDTthreads(nBatch, false)*highSize, sizeof(int));
166 
167   const int *restrict gp = grp;
168   #pragma omp parallel for num_threads(getDTthreads(nBatch, false))   // schedule(dynamic,1)
169   for (int b=0; b<nBatch; b++) {
170     int *restrict my_counts = counts + b*highSize;
171     uint16_t *restrict my_high = high + b*batchSize;
172     const int *my_pg = gp + b*batchSize;
173     const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
174     for (int i=0; i<howMany; i++) {
175       const int w = my_pg[i] >> shift;
176       my_counts[w]++;
177       my_high[i] = (uint16_t)w;  // reduce 4 bytes to 2
178     }
179     for (int i=0, cum=0; i<highSize; i++) {
180       int tmp = my_counts[i];
181       my_counts[i] = cum;
182       cum += tmp;
183     }
184     uint16_t *restrict my_low = low + b*batchSize;
185     int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
186     memcpy(my_tmpcounts, my_counts, highSize*sizeof(int));
187     for (int i=0; i<howMany; i++) {
188       const int w = my_pg[i] >> shift;   // could use my_high but may as well use my_pg since we need my_pg anyway for the lower bits next too
189       my_low[my_tmpcounts[w]++] = (uint16_t)(my_pg[i] & mask);
190     }
191     // counts is now cumulated within batch (with ending values) and we leave it that way
192     // memcpy(counts + b*256, myCounts, 256*sizeof(int));  // save cumulate for later, first bucket contains position of next. For ease later in the very last batch.
193   }
194   if (verbose) { Rprintf(_("gforce assign high and low took %.3f\n"), wallclock()-started); started=wallclock(); }
195 
196   oo = INTEGER(o);
197   ff = INTEGER(f);
198 
199   SEXP ans = PROTECT( eval(jsub, env) );
200   if (verbose) { Rprintf(_("gforce eval took %.3f\n"), wallclock()-started); started=wallclock(); }
201   // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above.
202   if (isVectorAtomic(ans)) {
203     SEXP tt = PROTECT(allocVector(VECSXP, 1));
204     SET_VECTOR_ELT(tt, 0, ans);
205     UNPROTECT(2);
206     return tt;
207   }
208   UNPROTECT(1);
209   return ans;
210 }
211 
gather(SEXP x,bool * anyNA)212 void *gather(SEXP x, bool *anyNA)
213 {
214   double started=wallclock();
215   const bool verbose = GetVerbose();
216   if (verbose) Rprintf(_("gather took ... "));
217   switch (TYPEOF(x)) {
218   case LGLSXP: case INTSXP: {
219     const int *restrict thisx = INTEGER(x);
220     #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
221     for (int b=0; b<nBatch; b++) {
222       int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
223       memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));   // original cumulated   // already cumulated for this batch
224       int *restrict my_gx = (int *)gx + b*batchSize;
225       const uint16_t *my_high = high + b*batchSize;
226       const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
227       bool my_anyNA = false;
228       if (irowslen==-1) {
229         const int *my_x = thisx + b*batchSize;
230         for (int i=0; i<howMany; i++) {
231           const int elem = my_x[i];
232           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
233           if (elem==NA_INTEGER) my_anyNA = true;
234         }
235       } else {
236         const int *my_x = irows + b*batchSize;
237         for (int i=0; i<howMany; i++) {
238           int elem = thisx[ my_x[i]-1 ];
239           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
240           if (elem==NA_INTEGER) my_anyNA = true;
241         }
242       }
243       if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
244     }
245   } break;
246   case REALSXP: {
247     if (!INHERITS(x, char_integer64)) {
248       const double *restrict thisx = REAL(x);
249       #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
250       for (int b=0; b<nBatch; b++) {
251         int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
252         memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
253         double *restrict my_gx = (double *)gx + b*batchSize;
254         const uint16_t *my_high = high + b*batchSize;
255         const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
256         bool my_anyNA = false;
257         if (irowslen==-1) {
258           const double *my_x = thisx + b*batchSize;
259           for (int i=0; i<howMany; i++) {
260             const double elem = my_x[i];
261             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
262             if (ISNAN(elem)) my_anyNA = true;   // R's ISNAN includes NA; i.e. defined as C isnan with some platform specific differences (perhaps historic)
263           }
264         } else {
265           const int *my_x = irows + b*batchSize;
266           for (int i=0; i<howMany; i++) {
267             double elem = thisx[ my_x[i]-1 ];
268             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
269             if (ISNAN(elem)) my_anyNA = true;
270           }
271         }
272         if (my_anyNA) *anyNA = true;
273       }
274     } else {
275       const int64_t *restrict thisx = (int64_t *)REAL(x);
276       #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
277       for (int b=0; b<nBatch; b++) {
278         int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
279         memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
280         int64_t *restrict my_gx = (int64_t *)gx + b*batchSize;
281         const uint16_t *my_high = high + b*batchSize;
282         const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
283         bool my_anyNA = false;
284         if (irowslen==-1) {
285           const int64_t *my_x = thisx + b*batchSize;
286           for (int i=0; i<howMany; i++) {
287             const int64_t elem = my_x[i];
288             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
289             if (elem==INT64_MIN) my_anyNA = true;
290           }
291         } else {
292           const int *my_x = irows + b*batchSize;
293           for (int i=0; i<howMany; i++) {
294             int64_t elem = thisx[ my_x[i]-1 ];
295             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
296             if (elem==INT64_MIN) my_anyNA = true;
297           }
298         }
299         if (my_anyNA) *anyNA = true;
300       }
301     }
302   } break;
303   case CPLXSXP: {
304     const Rcomplex *restrict thisx = COMPLEX(x);
305     #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
306     for (int b=0; b<nBatch; b++) {
307       int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
308       memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
309       Rcomplex *restrict my_gx = (Rcomplex *)gx + b*batchSize;
310       const uint16_t *my_high = high + b*batchSize;
311       const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
312       bool my_anyNA = false;
313       if (irowslen==-1) {
314         const Rcomplex *my_x = thisx + b*batchSize;
315         for (int i=0; i<howMany; i++) {
316           const Rcomplex elem = my_x[i];
317           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
318           // typically just checking one component would be enough,
319           //   but ?complex suggests there may be some edge cases; better to be safe
320           if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
321         }
322       } else {
323         const int *my_x = irows + b*batchSize;
324         for (int i=0; i<howMany; i++) {
325           Rcomplex elem = thisx[ my_x[i]-1 ];
326           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
327           if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
328         }
329       }
330       if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
331     }
332   } break;
333   default :
334     error(_("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'"), type2char(TYPEOF(x)));   // # nocov
335   }
336   if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
337   return gx;
338 }
339 
gsum(SEXP x,SEXP narmArg,SEXP warnOverflowArg)340 SEXP gsum(SEXP x, SEXP narmArg, SEXP warnOverflowArg)
341 {
342   if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
343   const bool narm = LOGICAL(narmArg)[0];
344   const bool warnOverflow = LOGICAL(warnOverflowArg)[0];
345   if (inherits(x, "factor")) error(_("sum is not meaningful for factors."));
346   const int n = (irowslen == -1) ? length(x) : irowslen;
347   double started = wallclock();
348   const bool verbose=GetVerbose();
349   if (verbose) Rprintf(_("This gsum took (narm=%s) ... "), narm?"TRUE":"FALSE");
350   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum");
351   bool anyNA=false;
352   SEXP ans;
353   switch(TYPEOF(x)) {
354   case LGLSXP: case INTSXP: {
355     const int *restrict gx = gather(x, &anyNA);
356     ans = PROTECT(allocVector(INTSXP, ngrp));
357     int *restrict ansp = INTEGER(ans);
358     memset(ansp, 0, ngrp*sizeof(int));
359     bool overflow=false;
360     //double started = wallclock();
361     if (!anyNA) {
362       #pragma omp parallel for num_threads(getDTthreads(highSize, false)) //schedule(dynamic,1)
363       for (int h=0; h<highSize; h++) {   // very important that high is first loop here
364         int *restrict _ans = ansp + (h<<shift);
365         for (int b=0; b<nBatch; b++) {
366           const int pos = counts[ b*highSize + h ];
367           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
368           const int *my_gx = gx + b*batchSize + pos;
369           const uint16_t *my_low = low + b*batchSize + pos;
370           for (int i=0; i<howMany; i++) {
371             const int a = _ans[my_low[i]];
372             const int b = my_gx[i];
373             if ((a>0 && b>INT_MAX-a) || (a<0 && b<NA_INTEGER+1-a)) overflow=true;
374             else _ans[my_low[i]] += b;  // naked by design; each thread does all of each h for all batches
375           }
376         }
377       }
378     } else {
379       #pragma omp parallel for num_threads(getDTthreads(highSize, false))
380       for (int h=0; h<highSize; h++) {
381         int *restrict _ans = ansp + (h<<shift);
382         for (int b=0; b<nBatch; b++) {
383           const int pos = counts[ b*highSize + h ];
384           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
385           const int *my_gx = gx + b*batchSize + pos;
386           const uint16_t *my_low = low + b*batchSize + pos;
387           for (int i=0; i<howMany; i++) {
388             const int a = _ans[my_low[i]];
389             if (a==NA_INTEGER) continue;
390             const int b = my_gx[i];
391             if (b==NA_INTEGER) {
392               if (!narm) _ans[my_low[i]]=NA_INTEGER;
393               continue;
394             }
395             if ((a>0 && b>INT_MAX-a) || (a<0 && b<NA_INTEGER+1-a)) overflow=true;
396             else _ans[my_low[i]] += b;
397           }
398         }
399       }
400     }
401     //Rprintf(_("gsum int took %.3f\n"), wallclock()-started);
402     if (overflow) {
403       UNPROTECT(1); // discard the result with overflow
404       if (warnOverflow) warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience."));
405       ans = PROTECT(allocVector(REALSXP, ngrp));
406       double *restrict ansp = REAL(ans);
407       memset(ansp, 0, ngrp*sizeof(double));
408       #pragma omp parallel for num_threads(getDTthreads(highSize, false))
409       for (int h=0; h<highSize; h++) {
410         double *restrict _ans = ansp + (h<<shift);
411         for (int b=0; b<nBatch; b++) {
412           const int pos = counts[ b*highSize + h ];
413           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
414           const int *my_gx = gx + b*batchSize + pos;
415           const uint16_t *my_low = low + b*batchSize + pos;
416           // rare and slower so no need to switch on anyNA
417           for (int i=0; i<howMany; i++) {
418             const int elem = my_gx[i];
419             if (elem==NA_INTEGER) {
420               if (!narm) _ans[my_low[i]]=NA_REAL;
421               continue;
422             }
423             _ans[my_low[i]] += elem;  // let NA_REAL propagate
424           }
425         }
426       }
427     }
428   } break;
429   case REALSXP: {
430     if (!INHERITS(x, char_integer64)) {
431       const double *restrict gx = gather(x, &anyNA);
432       ans = PROTECT(allocVector(REALSXP, ngrp));
433       double *restrict ansp = REAL(ans);
434       memset(ansp, 0, ngrp*sizeof(double));
435       if (!narm || !anyNA) {
436         #pragma omp parallel for num_threads(getDTthreads(highSize, false))
437         for (int h=0; h<highSize; h++) {
438           double *restrict _ans = ansp + (h<<shift);
439           for (int b=0; b<nBatch; b++) {
440             const int pos = counts[ b*highSize + h ];
441             const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
442             const double *my_gx = gx + b*batchSize + pos;
443             const uint16_t *my_low = low + b*batchSize + pos;
444             for (int i=0; i<howMany; i++) {
445               _ans[my_low[i]] += my_gx[i];  // let NA propagate when !narm
446             }
447           }
448         }
449       } else {
450         // narm==true and anyNA==true
451         #pragma omp parallel for num_threads(getDTthreads(highSize, false))
452         for (int h=0; h<highSize; h++) {
453           double *restrict _ans = ansp + (h<<shift);
454           for (int b=0; b<nBatch; b++) {
455             const int pos = counts[ b*highSize + h ];
456             const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
457             const double *my_gx = gx + b*batchSize + pos;
458             const uint16_t *my_low = low + b*batchSize + pos;
459             for (int i=0; i<howMany; i++) {
460               const double elem = my_gx[i];
461               if (!ISNAN(elem)) _ans[my_low[i]] += elem;
462             }
463           }
464         }
465       }
466     } else { // int64
467       const int64_t *restrict gx = gather(x, &anyNA);
468       ans = PROTECT(allocVector(REALSXP, ngrp));
469       int64_t *restrict ansp = (int64_t *)REAL(ans);
470       memset(ansp, 0, ngrp*sizeof(int64_t));
471       if (!anyNA) {
472         #pragma omp parallel for num_threads(getDTthreads(highSize, false))
473         for (int h=0; h<highSize; h++) {
474           int64_t *restrict _ans = ansp + (h<<shift);
475           for (int b=0; b<nBatch; b++) {
476             const int pos = counts[ b*highSize + h ];
477             const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
478             const int64_t *my_gx = gx + b*batchSize + pos;
479             const uint16_t *my_low = low + b*batchSize + pos;
480             for (int i=0; i<howMany; i++) {
481               _ans[my_low[i]] += my_gx[i]; // does not propagate INT64 for !narm
482             }
483           }
484         }
485       } else { // narm==true/false and anyNA==true
486         if (!narm) {
487           #pragma omp parallel for num_threads(getDTthreads(highSize, false))
488           for (int h=0; h<highSize; h++) {
489             int64_t *restrict _ans = ansp + (h<<shift);
490             for (int b=0; b<nBatch; b++) {
491               const int pos = counts[ b*highSize + h ];
492               const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
493               const int64_t *my_gx = gx + b*batchSize + pos;
494               const uint16_t *my_low = low + b*batchSize + pos;
495               for (int i=0; i<howMany; i++) {
496                 const int64_t elem = my_gx[i];
497                 if (elem!=INT64_MIN) {
498                   _ans[my_low[i]] += elem;
499                 } else {
500                   _ans[my_low[i]] = INT64_MIN;
501                   break;
502                 }
503               }
504             }
505           }
506         } else {
507           #pragma omp parallel for num_threads(getDTthreads(highSize, false))
508           for (int h=0; h<highSize; h++) {
509             int64_t *restrict _ans = ansp + (h<<shift);
510             for (int b=0; b<nBatch; b++) {
511               const int pos = counts[ b*highSize + h ];
512               const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
513               const int64_t *my_gx = gx + b*batchSize + pos;
514               const uint16_t *my_low = low + b*batchSize + pos;
515               for (int i=0; i<howMany; i++) {
516                 const int64_t elem = my_gx[i];
517                 if (elem!=INT64_MIN) _ans[my_low[i]] += elem;
518               }
519             }
520           }
521         }
522       }
523     }
524   } break;
525   case CPLXSXP: {
526     const Rcomplex *restrict gx = gather(x, &anyNA);
527     ans = PROTECT(allocVector(CPLXSXP, ngrp));
528     Rcomplex *restrict ansp = COMPLEX(ans);
529     memset(ansp, 0, ngrp*sizeof(Rcomplex));
530     if (!narm || !anyNA) {
531       #pragma omp parallel for num_threads(getDTthreads(highSize, false))
532       for (int h=0; h<highSize; h++) {
533         Rcomplex *restrict _ans = ansp + (h<<shift);
534         for (int b=0; b<nBatch; b++) {
535           const int pos = counts[ b*highSize + h ];
536           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
537           const Rcomplex *my_gx = gx + b*batchSize + pos;
538           const uint16_t *my_low = low + b*batchSize + pos;
539           for (int i=0; i<howMany; i++) {
540             _ans[my_low[i]].r += my_gx[i].r;  // let NA propagate when !narm
541             _ans[my_low[i]].i += my_gx[i].i;
542           }
543         }
544       }
545     } else {
546       // narm==true and anyNA==true
547       #pragma omp parallel for num_threads(getDTthreads(highSize, false))
548       for (int h=0; h<highSize; h++) {
549         Rcomplex *restrict _ans = ansp + (h<<shift);
550         for (int b=0; b<nBatch; b++) {
551           const int pos = counts[ b*highSize + h ];
552           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
553           const Rcomplex *my_gx = gx + b*batchSize + pos;
554           const uint16_t *my_low = low + b*batchSize + pos;
555           for (int i=0; i<howMany; i++) {
556             const Rcomplex elem = my_gx[i];
557             if (!ISNAN(elem.r)) _ans[my_low[i]].r += elem.r;
558             if (!ISNAN(elem.i)) _ans[my_low[i]].i += elem.i;
559           }
560         }
561       }
562     }
563   } break;
564   default:
565     error(_("Type '%s' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
566   }
567   copyMostAttrib(x, ans);
568   if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
569   UNPROTECT(1);
570   return(ans);
571 }
572 
gmean(SEXP x,SEXP narm)573 SEXP gmean(SEXP x, SEXP narm)
574 {
575   SEXP ans=R_NilValue;
576   //clock_t start = clock();
577   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
578   if (!isVectorAtomic(x)) error(_("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table."));
579   if (inherits(x, "factor")) error(_("mean is not meaningful for factors."));
580   if (!LOGICAL(narm)[0]) {
581     int protecti=0;
582     ans = PROTECT(gsum(x, narm, /*#986, warnOverflow=*/ScalarLogical(FALSE))); protecti++;
583     switch(TYPEOF(ans)) {
584     case LGLSXP: case INTSXP:
585       ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
586     case REALSXP: {
587       double *xd = REAL(ans);
588       for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];  // let NA propogate
589     } break;
590     case CPLXSXP: {
591       Rcomplex *xd = COMPLEX(ans);
592       for (int i=0; i<ngrp; i++) {
593         xd->i /= grpsize[i];
594         xd->r /= grpsize[i];
595         xd++;
596       }
597     } break;
598     default :
599       error(_("Internal error: gsum returned type '%s'. typeof(x) is '%s'"), type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov
600     }
601     UNPROTECT(protecti);
602     return(ans);
603   }
604   // na.rm=TRUE.  Similar to gsum, but we need to count the non-NA as well for the divisor
605   const int n = (irowslen == -1) ? length(x) : irowslen;
606   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum");
607 
608   long double *s = calloc(ngrp, sizeof(long double)), *si=NULL;  // s = sum; si = sum imaginary just for complex
609   if (!s) error(_("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE"), ngrp, sizeof(long double));
610 
611   int *c = calloc(ngrp, sizeof(int));
612   if (!c) error(_("Unable to allocate %d * %d bytes for counts in gmean na.rm=TRUE"), ngrp, sizeof(int));
613 
614   switch(TYPEOF(x)) {
615   case LGLSXP: case INTSXP: {
616     const int *xd = INTEGER(x);
617     for (int i=0; i<n; i++) {
618       int thisgrp = grp[i];
619       int ix = (irowslen == -1) ? i : irows[i]-1;
620       if (xd[ix] == NA_INTEGER) continue;
621       s[thisgrp] += xd[ix];  // no under/overflow here, s is long double
622       c[thisgrp]++;
623     }
624   } break;
625   case REALSXP: {
626     const double *xd = REAL(x);
627     for (int i=0; i<n; i++) {
628       int thisgrp = grp[i];
629       int ix = (irowslen == -1) ? i : irows[i]-1;
630       if (ISNAN(xd[ix])) continue;
631       s[thisgrp] += xd[ix];
632       c[thisgrp]++;
633     }
634   } break;
635   case CPLXSXP: {
636     const Rcomplex *xd = COMPLEX(x);
637     si = calloc(ngrp, sizeof(long double));
638     if (!si) error(_("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE"), ngrp, sizeof(long double));
639     for (int i=0; i<n; i++) {
640       int thisgrp = grp[i];
641       int ix = (irowslen == -1) ? i : irows[i]-1;
642       if (ISNAN(xd[ix].r) || ISNAN(xd[ix].i)) continue;  // || otherwise we'll need two counts in two c's too?
643       s[thisgrp] += xd[ix].r;
644       si[thisgrp] += xd[ix].i;
645       c[thisgrp]++;
646     }
647   } break;
648   default:
649     free(s); free(c); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
650     error(_("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x))); // # nocov
651   }
652   switch(TYPEOF(x)) {
653   case LGLSXP: case INTSXP: case REALSXP: {
654     ans = PROTECT(allocVector(REALSXP, ngrp));
655     double *ansd = REAL(ans);
656     for (int i=0; i<ngrp; i++) {
657       if (c[i]==0) { ansd[i] = R_NaN; continue; }  // NaN to follow base::mean
658       s[i] /= c[i];
659       ansd[i] = s[i]>DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
660     }
661   } break;
662   case CPLXSXP: {
663     ans = PROTECT(allocVector(CPLXSXP, ngrp));
664     Rcomplex *ansd = COMPLEX(ans);
665     for (int i=0; i<ngrp; i++) {
666       if (c[i]==0) { ansd[i].r = R_NaN; ansd[i].i = R_NaN; continue; }
667       s[i] /= c[i];
668       si[i] /= c[i];
669       ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
670       ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]);
671     }
672   } break;
673   default:
674     error(_("Internal error: unsupported type at the end of gmean")); // # nocov
675   }
676   free(s); free(si); free(c);
677   copyMostAttrib(x, ans);
678   // Rprintf(_("this gmean na.rm=TRUE took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
679   UNPROTECT(1);
680   return(ans);
681 }
682 
683 // gmin
gmin(SEXP x,SEXP narm)684 SEXP gmin(SEXP x, SEXP narm)
685 {
686   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
687   if (!isVectorAtomic(x)) error(_("GForce min can only be applied to columns, not .SD or similar. To find min of all items in a list such as .SD, either add the prefix base::min(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,min),by=,.SDcols=]'"));
688   if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("min is not meaningful for factors."));
689   R_len_t i, ix, thisgrp=0;
690   int n = (irowslen == -1) ? length(x) : irowslen;
691   //clock_t start = clock();
692   SEXP ans;
693   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");
694   int protecti=0;
695   switch(TYPEOF(x)) {
696   case LGLSXP: case INTSXP:
697     ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
698     if (!LOGICAL(narm)[0]) {
699       for (i=0; i<ngrp; i++) INTEGER(ans)[i] = INT_MAX;
700       for (i=0; i<n; i++) {
701         thisgrp = grp[i];
702         ix = (irowslen == -1) ? i : irows[i]-1;
703         if (INTEGER(x)[ix] < INTEGER(ans)[thisgrp])   // NA_INTEGER==INT_MIN checked in init.c
704           INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
705       }
706     } else {
707       for (i=0; i<ngrp; i++) INTEGER(ans)[i] = NA_INTEGER;
708       for (i=0; i<n; i++) {
709         thisgrp = grp[i];
710         ix = (irowslen == -1) ? i : irows[i]-1;
711         if (INTEGER(x)[ix] == NA_INTEGER) continue;
712         if (INTEGER(ans)[thisgrp] == NA_INTEGER || INTEGER(x)[ix] < INTEGER(ans)[thisgrp])
713           INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
714       }
715       for (i=0; i<ngrp; i++) {
716         if (INTEGER(ans)[i] == NA_INTEGER) {
717           warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
718           ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
719           for (i=0; i<ngrp; i++) {
720             if (ISNA(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
721           }
722           break;
723         }
724       }
725     }
726     break;
727   case STRSXP:
728     ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
729     if (!LOGICAL(narm)[0]) {
730       for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, R_BlankString);
731       for (i=0; i<n; i++) {
732         thisgrp = grp[i];
733         ix = (irowslen == -1) ? i : irows[i]-1;
734         if (STRING_ELT(x, ix) == NA_STRING) {
735           SET_STRING_ELT(ans, thisgrp, NA_STRING);
736         } else {
737           if (STRING_ELT(ans, thisgrp) == R_BlankString ||
738             (STRING_ELT(ans, thisgrp) != NA_STRING && strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0 )) {
739             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
740           }
741         }
742       }
743     } else {
744       for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, NA_STRING);
745       for (i=0; i<n; i++) {
746         thisgrp = grp[i];
747         ix = (irowslen == -1) ? i : irows[i]-1;
748         if (STRING_ELT(x, ix) == NA_STRING) continue;
749         if (STRING_ELT(ans, thisgrp) == NA_STRING ||
750           strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0) {
751           SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
752         }
753       }
754       for (i=0; i<ngrp; i++) {
755         if (STRING_ELT(ans, i)==NA_STRING) {
756           warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
757           break;
758         }
759       }
760     }
761     break;
762   case REALSXP:
763     ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
764     if (!LOGICAL(narm)[0]) {
765       for (i=0; i<ngrp; i++) REAL(ans)[i] = R_PosInf;
766       for (i=0; i<n; i++) {
767         thisgrp = grp[i];
768         ix = (irowslen == -1) ? i : irows[i]-1;
769         if (ISNAN(REAL(x)[ix]) || REAL(x)[ix] < REAL(ans)[thisgrp])
770           REAL(ans)[thisgrp] = REAL(x)[ix];
771       }
772     } else {
773       for (i=0; i<ngrp; i++) REAL(ans)[i] = NA_REAL;
774       for (i=0; i<n; i++) {
775         thisgrp = grp[i];
776         ix = (irowslen == -1) ? i : irows[i]-1;
777         if (ISNAN(REAL(x)[ix])) continue;
778         if (ISNAN(REAL(ans)[thisgrp]) || REAL(x)[ix] < REAL(ans)[thisgrp])
779           REAL(ans)[thisgrp] = REAL(x)[ix];
780       }
781       for (i=0; i<ngrp; i++) {
782         if (ISNAN(REAL(ans)[i])) {
783           warning(_("No non-missing values found in at least one group. Returning 'Inf' for such groups to be consistent with base"));
784           for (; i<ngrp; i++) if (ISNAN(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
785           break;
786         }
787       }
788     }
789     break;
790   case CPLXSXP:
791     error(_("Type 'complex' has no well-defined min"));
792     break;
793   default:
794     error(_("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
795   }
796   copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
797   UNPROTECT(protecti);  // ans + maybe 1 coerced ans
798   // Rprintf(_("this gmin took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
799   return(ans);
800 }
801 
802 // gmax
gmax(SEXP x,SEXP narm)803 SEXP gmax(SEXP x, SEXP narm)
804 {
805   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
806   if (!isVectorAtomic(x)) error(_("GForce max can only be applied to columns, not .SD or similar. To find max of all items in a list such as .SD, either add the prefix base::max(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,max),by=,.SDcols=]'"));
807   if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("max is not meaningful for factors."));
808   R_len_t i, ix, thisgrp=0;
809   int n = (irowslen == -1) ? length(x) : irowslen;
810   //clock_t start = clock();
811   SEXP ans;
812   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmax");
813 
814   // TODO rework gmax in the same way as gmin and remove this *update
815   char *update = (char *)R_alloc(ngrp, sizeof(char));
816   for (int i=0; i<ngrp; i++) update[i] = 0;
817   int protecti=0;
818   switch(TYPEOF(x)) {
819   case LGLSXP: case INTSXP:
820     ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
821     for (i=0; i<ngrp; i++) INTEGER(ans)[i] = 0;
822     if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
823       for (i=0; i<n; i++) {
824         thisgrp = grp[i];
825         ix = (irowslen == -1) ? i : irows[i]-1;
826         if (INTEGER(x)[ix] != NA_INTEGER && INTEGER(ans)[thisgrp] != NA_INTEGER) {
827           if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
828             INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
829             if (update[thisgrp] != 1) update[thisgrp] = 1;
830           }
831         } else  INTEGER(ans)[thisgrp] = NA_INTEGER;
832       }
833     } else {
834       for (i=0; i<n; i++) {
835         thisgrp = grp[i];
836         ix = (irowslen == -1) ? i : irows[i]-1;
837         if (INTEGER(x)[ix] != NA_INTEGER) {
838           if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
839             INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
840             if (update[thisgrp] != 1) update[thisgrp] = 1;
841           }
842         } else {
843           if (update[thisgrp] != 1) {
844             INTEGER(ans)[thisgrp] = NA_INTEGER;
845           }
846         }
847       }
848       for (i=0; i<ngrp; i++) {
849         if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
850           warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
851           ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
852           for (i=0; i<ngrp; i++) {
853             if (update[i] != 1) REAL(ans)[i] = -R_PosInf;
854           }
855           break;
856         }
857       }
858     }
859     break;
860   case STRSXP:
861     ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
862     for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, mkChar(""));
863     if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
864       for (i=0; i<n; i++) {
865         thisgrp = grp[i];
866         ix = (irowslen == -1) ? i : irows[i]-1;
867         if (STRING_ELT(x,ix) != NA_STRING && STRING_ELT(ans, thisgrp) != NA_STRING) {
868           if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x,ix))) < 0 ) {
869             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
870             if (update[thisgrp] != 1) update[thisgrp] = 1;
871           }
872         } else  SET_STRING_ELT(ans, thisgrp, NA_STRING);
873       }
874     } else {
875       for (i=0; i<n; i++) {
876         thisgrp = grp[i];
877         ix = (irowslen == -1) ? i : irows[i]-1;
878         if (STRING_ELT(x, ix) != NA_STRING) {
879           if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x, ix))) < 0 ) {
880             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
881             if (update[thisgrp] != 1) update[thisgrp] = 1;
882           }
883         } else {
884           if (update[thisgrp] != 1) {
885             SET_STRING_ELT(ans, thisgrp, NA_STRING);
886           }
887         }
888       }
889       for (i=0; i<ngrp; i++) {
890         if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
891           warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
892           break;
893         }
894       }
895     }
896     break;
897   case REALSXP:
898     ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
899     for (i=0; i<ngrp; i++) REAL(ans)[i] = 0;
900     if (!LOGICAL(narm)[0]) {
901       for (i=0; i<n; i++) {
902         thisgrp = grp[i];
903         ix = (irowslen == -1) ? i : irows[i]-1;
904         if ( !ISNA(REAL(x)[ix]) && !ISNA(REAL(ans)[thisgrp]) ) {
905           if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ||
906              (ISNAN(REAL(x)[ix]) && !ISNAN(REAL(ans)[thisgrp])) ) { // #1461
907             REAL(ans)[thisgrp] = REAL(x)[ix];
908             if (update[thisgrp] != 1) update[thisgrp] = 1;
909           }
910         } else REAL(ans)[thisgrp] = NA_REAL;
911       }
912     } else {
913       for (i=0; i<n; i++) {
914         thisgrp = grp[i];
915         ix = (irowslen == -1) ? i : irows[i]-1;
916         if ( !ISNAN(REAL(x)[ix]) ) { // #1461
917           if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ) {
918             REAL(ans)[thisgrp] = REAL(x)[ix];
919             if (update[thisgrp] != 1) update[thisgrp] = 1;
920           }
921         } else {
922           if (update[thisgrp] != 1) {
923             REAL(ans)[thisgrp] = -R_PosInf;
924           }
925         }
926       }
927       // everything taken care of already. Just warn if all NA groups have occurred at least once
928       for (i=0; i<ngrp; i++) {
929         if (update[i] != 1)  { // equivalent of REAL(ans)[thisgrp] == -R_PosInf
930           warning(_("No non-missing values found in at least one group. Returning '-Inf' for such groups to be consistent with base"));
931           break;
932         }
933       }
934     }
935     break;
936   case CPLXSXP:
937     error(_("Type 'complex' has no well-defined max"));
938     break;
939   default:
940     error(_("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
941   }
942   copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
943   UNPROTECT(protecti);
944   // Rprintf(_("this gmax took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
945   return(ans);
946 }
947 
948 // gmedian, always returns numeric type (to avoid as.numeric() wrap..)
gmedian(SEXP x,SEXP narmArg)949 SEXP gmedian(SEXP x, SEXP narmArg) {
950   if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
951   if (!isVectorAtomic(x)) error(_("GForce median can only be applied to columns, not .SD or similar. To find median of all items in a list such as .SD, either add the prefix stats::median(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,median),by=,.SDcols=]'"));
952   if (inherits(x, "factor")) error(_("median is not meaningful for factors."));
953   const bool isInt64 = INHERITS(x, char_integer64), narm = LOGICAL(narmArg)[0];
954   int n = (irowslen == -1) ? length(x) : irowslen;
955   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian");
956   SEXP ans = PROTECT(allocVector(REALSXP, ngrp));
957   double *ansd = REAL(ans);
958   switch(TYPEOF(x)) {
959   case REALSXP: {
960     double *subd = REAL(PROTECT(allocVector(REALSXP, maxgrpn))); // allocate once upfront and reuse
961     int64_t *xi64 = (int64_t *)REAL(x);
962     double  *xd = REAL(x);
963     for (int i=0; i<ngrp; ++i) {
964       int thisgrpsize = grpsize[i], nacount=0;
965       for (int j=0; j<thisgrpsize; ++j) {
966         int k = ff[i]+j-1;
967         if (isunsorted) k = oo[k]-1;
968         k = (irowslen == -1) ? k : irows[k]-1;
969         if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
970         else subd[j-nacount] = xd[k];
971       }
972       thisgrpsize -= nacount;  // all-NA is returned as NA_REAL via n==0 case inside *quickselect
973       ansd[i] = (nacount && !narm) ? NA_REAL : (isInt64 ? i64quickselect((void *)subd, thisgrpsize) : dquickselect(subd, thisgrpsize));
974     }}
975     break;
976   case LGLSXP: case INTSXP: {
977     int *subi = INTEGER(PROTECT(allocVector(INTSXP, maxgrpn)));
978     int *xi = INTEGER(x);
979     for (int i=0; i<ngrp; i++) {
980       int thisgrpsize = grpsize[i], nacount=0;
981       for (int j=0; j<thisgrpsize; ++j) {
982         int k = ff[i]+j-1;
983         if (isunsorted) k = oo[k]-1;
984         k = (irowslen == -1) ? k : irows[k]-1;
985         if (xi[k]==NA_INTEGER) nacount++;
986         else subi[j-nacount] = xi[k];
987       }
988       ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount);
989     }}
990     break;
991   default:
992     error(_("Type '%s' not supported by GForce median (gmedian). Either add the prefix stats::median(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
993   }
994   if (!isInt64) copyMostAttrib(x, ans);
995   // else the integer64 class needs to be dropped since double is always returned by gmedian
996   UNPROTECT(2);  // ans, subd|subi
997   return ans;
998 }
999 
glast(SEXP x)1000 SEXP glast(SEXP x) {
1001 
1002   R_len_t i,k;
1003   int n = (irowslen == -1) ? length(x) : irowslen;
1004   SEXP ans;
1005   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gtail");
1006   switch(TYPEOF(x)) {
1007   case LGLSXP: {
1008     const int *ix = LOGICAL(x);
1009     ans = PROTECT(allocVector(LGLSXP, ngrp));
1010     int *ians = LOGICAL(ans);
1011     for (i=0; i<ngrp; i++) {
1012       k = ff[i]+grpsize[i]-2;
1013       if (isunsorted) k = oo[k]-1;
1014       k = (irowslen == -1) ? k : irows[k]-1;
1015       ians[i] = ix[k];
1016     }
1017   }
1018     break;
1019   case INTSXP: {
1020     const int *ix = INTEGER(x);
1021     ans = PROTECT(allocVector(INTSXP, ngrp));
1022     int *ians = INTEGER(ans);
1023     for (i=0; i<ngrp; i++) {
1024       k = ff[i]+grpsize[i]-2;
1025       if (isunsorted) k = oo[k]-1;
1026       k = (irowslen == -1) ? k : irows[k]-1;
1027       ians[i] = ix[k];
1028     }
1029   }
1030     break;
1031   case REALSXP: {
1032     const double *dx = REAL(x);
1033     ans = PROTECT(allocVector(REALSXP, ngrp));
1034     double *dans = REAL(ans);
1035     for (i=0; i<ngrp; i++) {
1036       k = ff[i]+grpsize[i]-2;
1037       if (isunsorted) k = oo[k]-1;
1038       k = (irowslen == -1) ? k : irows[k]-1;
1039       dans[i] = dx[k];
1040     }
1041   }
1042     break;
1043   case CPLXSXP: {
1044     const Rcomplex *dx = COMPLEX(x);
1045     ans = PROTECT(allocVector(CPLXSXP, ngrp));
1046     Rcomplex *dans = COMPLEX(ans);
1047     for (i=0; i<ngrp; i++) {
1048       k = ff[i]+grpsize[i]-2;
1049       if (isunsorted) k = oo[k]-1;
1050       k = (irowslen == -1) ? k : irows[k]-1;
1051       dans[i] = dx[k];
1052     }
1053   } break;
1054   case STRSXP:
1055     ans = PROTECT(allocVector(STRSXP, ngrp));
1056     for (i=0; i<ngrp; i++) {
1057       k = ff[i]+grpsize[i]-2;
1058       if (isunsorted) k = oo[k]-1;
1059       k = (irowslen == -1) ? k : irows[k]-1;
1060       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1061     }
1062     break;
1063   case VECSXP:
1064     ans = PROTECT(allocVector(VECSXP, ngrp));
1065     for (i=0; i<ngrp; i++) {
1066       k = ff[i]+grpsize[i]-2;
1067       if (isunsorted) k = oo[k]-1;
1068       k = (irowslen == -1) ? k : irows[k]-1;
1069       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1070     }
1071     break;
1072   default:
1073     error(_("Type '%s' not supported by GForce tail (gtail). Either add the prefix utils::tail(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1074   }
1075   copyMostAttrib(x, ans);
1076   UNPROTECT(1);
1077   return(ans);
1078 }
1079 
gfirst(SEXP x)1080 SEXP gfirst(SEXP x) {
1081 
1082   R_len_t i,k;
1083   int n = (irowslen == -1) ? length(x) : irowslen;
1084   SEXP ans;
1085   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
1086   switch(TYPEOF(x)) {
1087   case LGLSXP: {
1088     int const *ix = LOGICAL(x);
1089     ans = PROTECT(allocVector(LGLSXP, ngrp));
1090     int *ians = LOGICAL(ans);
1091     for (i=0; i<ngrp; i++) {
1092       k = ff[i]-1;
1093       if (isunsorted) k = oo[k]-1;
1094       k = (irowslen == -1) ? k : irows[k]-1;
1095       ians[i] = ix[k];
1096     }
1097   }
1098     break;
1099   case INTSXP: {
1100     const int *ix = INTEGER(x);
1101     ans = PROTECT(allocVector(INTSXP, ngrp));
1102     int *ians = INTEGER(ans);
1103     for (i=0; i<ngrp; i++) {
1104       k = ff[i]-1;
1105       if (isunsorted) k = oo[k]-1;
1106       k = (irowslen == -1) ? k : irows[k]-1;
1107       ians[i] = ix[k];
1108     }
1109   }
1110     break;
1111   case REALSXP: {
1112     const double *dx = REAL(x);
1113     ans = PROTECT(allocVector(REALSXP, ngrp));
1114     double *dans = REAL(ans);
1115     for (i=0; i<ngrp; i++) {
1116       k = ff[i]-1;
1117       if (isunsorted) k = oo[k]-1;
1118       k = (irowslen == -1) ? k : irows[k]-1;
1119       dans[i] = dx[k];
1120     }
1121   }
1122     break;
1123   case CPLXSXP: {
1124     const Rcomplex *dx = COMPLEX(x);
1125     ans = PROTECT(allocVector(CPLXSXP, ngrp));
1126     Rcomplex *dans = COMPLEX(ans);
1127     for (i=0; i<ngrp; i++) {
1128       k = ff[i]-1;
1129       if (isunsorted) k = oo[k]-1;
1130       k = (irowslen == -1) ? k : irows[k]-1;
1131       dans[i] = dx[k];
1132     }
1133   } break;
1134   case STRSXP:
1135     ans = PROTECT(allocVector(STRSXP, ngrp));
1136     for (i=0; i<ngrp; i++) {
1137       k = ff[i]-1;
1138       if (isunsorted) k = oo[k]-1;
1139       k = (irowslen == -1) ? k : irows[k]-1;
1140       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1141     }
1142     break;
1143   case VECSXP:
1144     ans = PROTECT(allocVector(VECSXP, ngrp));
1145     for (i=0; i<ngrp; i++) {
1146       k = ff[i]-1;
1147       if (isunsorted) k = oo[k]-1;
1148       k = (irowslen == -1) ? k : irows[k]-1;
1149       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1150     }
1151     break;
1152   default:
1153     error(_("Type '%s' not supported by GForce head (ghead). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1154   }
1155   copyMostAttrib(x, ans);
1156   UNPROTECT(1);
1157   return(ans);
1158 }
1159 
gtail(SEXP x,SEXP valArg)1160 SEXP gtail(SEXP x, SEXP valArg) {
1161   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, gtail is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1162   return (glast(x));
1163 }
1164 
ghead(SEXP x,SEXP valArg)1165 SEXP ghead(SEXP x, SEXP valArg) {
1166   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, ghead is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1167   return (gfirst(x));
1168 }
1169 
gnthvalue(SEXP x,SEXP valArg)1170 SEXP gnthvalue(SEXP x, SEXP valArg) {
1171 
1172   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]<=0) error(_("Internal error, `g[` (gnthvalue) is only implemented single value subsets with positive index, e.g., .SD[2]. This should have been caught before. please report to data.table issue tracker.")); // # nocov
1173   R_len_t i,k, val=INTEGER(valArg)[0];
1174   int n = (irowslen == -1) ? length(x) : irowslen;
1175   SEXP ans;
1176   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
1177   switch(TYPEOF(x)) {
1178   case LGLSXP: {
1179     const int *ix = LOGICAL(x);
1180     ans = PROTECT(allocVector(LGLSXP, ngrp));
1181     int *ians = LOGICAL(ans);
1182     for (i=0; i<ngrp; i++) {
1183       if (val > grpsize[i]) { LOGICAL(ans)[i] = NA_LOGICAL; continue; }
1184       k = ff[i]+val-2;
1185       if (isunsorted) k = oo[k]-1;
1186       k = (irowslen == -1) ? k : irows[k]-1;
1187       ians[i] = ix[k];
1188     }
1189   }
1190     break;
1191   case INTSXP: {
1192     const int *ix = INTEGER(x);
1193     ans = PROTECT(allocVector(INTSXP, ngrp));
1194     int *ians = INTEGER(ans);
1195     for (i=0; i<ngrp; i++) {
1196       if (val > grpsize[i]) { INTEGER(ans)[i] = NA_INTEGER; continue; }
1197       k = ff[i]+val-2;
1198       if (isunsorted) k = oo[k]-1;
1199       k = (irowslen == -1) ? k : irows[k]-1;
1200       ians[i] = ix[k];
1201     }
1202   }
1203     break;
1204   case REALSXP: {
1205     const double *dx = REAL(x);
1206     ans = PROTECT(allocVector(REALSXP, ngrp));
1207     double *dans = REAL(ans);
1208     for (i=0; i<ngrp; i++) {
1209       if (val > grpsize[i]) { REAL(ans)[i] = NA_REAL; continue; }
1210       k = ff[i]+val-2;
1211       if (isunsorted) k = oo[k]-1;
1212       k = (irowslen == -1) ? k : irows[k]-1;
1213       dans[i] = dx[k];
1214     }
1215   }
1216     break;
1217   case CPLXSXP: {
1218     const Rcomplex *dx = COMPLEX(x);
1219     ans = PROTECT(allocVector(CPLXSXP, ngrp));
1220     Rcomplex *dans = COMPLEX(ans);
1221     for (i=0; i<ngrp; i++) {
1222       if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; }
1223       k = ff[i]+val-2;
1224       if (isunsorted) k = oo[k]-1;
1225       k = (irowslen == -1) ? k : irows[k]-1;
1226       dans[i] = dx[k];
1227     }
1228   } break;
1229   case STRSXP:
1230     ans = PROTECT(allocVector(STRSXP, ngrp));
1231     for (i=0; i<ngrp; i++) {
1232       if (val > grpsize[i]) { SET_STRING_ELT(ans, i, NA_STRING); continue; }
1233       k = ff[i]+val-2;
1234       if (isunsorted) k = oo[k]-1;
1235       k = (irowslen == -1) ? k : irows[k]-1;
1236       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
1237     }
1238     break;
1239   case VECSXP:
1240     ans = PROTECT(allocVector(VECSXP, ngrp));
1241     for (i=0; i<ngrp; i++) {
1242       if (val > grpsize[i]) { SET_VECTOR_ELT(ans, i, R_NilValue); continue; }
1243       k = ff[i]+val-2;
1244       if (isunsorted) k = oo[k]-1;
1245       k = (irowslen == -1) ? k : irows[k]-1;
1246       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
1247     }
1248     break;
1249   default:
1250     error(_("Type '%s' not supported by GForce subset `[` (gnthvalue). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1251   }
1252   copyMostAttrib(x, ans);
1253   UNPROTECT(1);
1254   return(ans);
1255 }
1256 
1257 // TODO: gwhich.min, gwhich.max
1258 // implemented this similar to gmedian to balance well between speed and memory usage. There's one extra allocation on maximum groups and that's it.. and that helps speed things up extremely since we don't have to collect x's values for each group for each step (mean, residuals, mean again and then variance).
gvarsd1(SEXP x,SEXP narm,Rboolean isSD)1259 SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
1260 {
1261   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
1262   if (!isVectorAtomic(x)) error(_("GForce var/sd can only be applied to columns, not .SD or similar. For the full covariance matrix of all items in a list such as .SD, either add the prefix stats::var(.SD) (or stats::sd(.SD)) or turn off GForce optimization using options(datatable.optimize=1). Alternatively, if you only need the diagonal elements, 'DT[,lapply(.SD,var),by=,.SDcols=]' is the optimized way to do this."));
1263   if (inherits(x, "factor")) error(_("var/sd is not meaningful for factors."));
1264   long double m, s, v;
1265   R_len_t i, j, ix, thisgrpsize = 0, n = (irowslen == -1) ? length(x) : irowslen;
1266   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar");
1267   SEXP sub, ans = PROTECT(allocVector(REALSXP, ngrp));
1268   Rboolean ans_na;
1269   switch(TYPEOF(x)) {
1270   case LGLSXP: case INTSXP:
1271     sub = PROTECT(allocVector(INTSXP, maxgrpn)); // allocate once upfront
1272     if (!LOGICAL(narm)[0]) {
1273       for (i=0; i<ngrp; i++) {
1274         m=0.; s=0.; v=0.; ans_na = FALSE;
1275         if (grpsize[i] != 1) {
1276           thisgrpsize = grpsize[i];
1277           SETLENGTH(sub, thisgrpsize); // to gather this group's data
1278           for (j=0; j<thisgrpsize; j++) {
1279             ix = ff[i]+j-1;
1280             if (isunsorted) ix = oo[ix]-1;
1281             ix = (irowslen == -1) ? ix : irows[ix]-1;
1282             if (INTEGER(x)[ix] == NA_INTEGER) { ans_na = TRUE; break; }
1283             INTEGER(sub)[j] = INTEGER(x)[ix];
1284             m += INTEGER(sub)[j]; // sum
1285           }
1286           if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
1287           m = m/thisgrpsize; // mean, first pass
1288           for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
1289           m += (s/thisgrpsize); // mean, second pass
1290           for (j=0; j<thisgrpsize; j++) { // variance
1291             v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
1292           }
1293           REAL(ans)[i] = (double)v/(thisgrpsize-1);
1294           if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1295         } else REAL(ans)[i] = NA_REAL;
1296       }
1297     } else {
1298       for (i=0; i<ngrp; i++) {
1299         m=0.; s=0.; v=0.; thisgrpsize = 0;
1300         if (grpsize[i] != 1) {
1301           SETLENGTH(sub, grpsize[i]); // to gather this group's data
1302           for (j=0; j<grpsize[i]; j++) {
1303             ix = ff[i]+j-1;
1304             if (isunsorted) ix = oo[ix]-1;
1305             ix = (irowslen == -1) ? ix : irows[ix]-1;
1306             if (INTEGER(x)[ix] == NA_INTEGER) continue;
1307             INTEGER(sub)[thisgrpsize] = INTEGER(x)[ix];
1308             m += INTEGER(sub)[thisgrpsize]; // sum
1309             thisgrpsize++;
1310           }
1311           if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
1312           m = m/thisgrpsize; // mean, first pass
1313           for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
1314           m += (s/thisgrpsize); // mean, second pass
1315           for (j=0; j<thisgrpsize; j++) { // variance
1316             v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
1317           }
1318           REAL(ans)[i] = (double)v/(thisgrpsize-1);
1319           if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1320         } else REAL(ans)[i] = NA_REAL;
1321       }
1322     }
1323     SETLENGTH(sub, maxgrpn);
1324     break;
1325   case REALSXP:
1326     sub = PROTECT(allocVector(REALSXP, maxgrpn)); // allocate once upfront
1327     if (!LOGICAL(narm)[0]) {
1328       for (i=0; i<ngrp; i++) {
1329         m=0.; s=0.; v=0.; ans_na = FALSE;
1330         if (grpsize[i] != 1) {
1331           thisgrpsize = grpsize[i];
1332           SETLENGTH(sub, thisgrpsize); // to gather this group's data
1333           for (j=0; j<thisgrpsize; j++) {
1334             ix = ff[i]+j-1;
1335             if (isunsorted) ix = oo[ix]-1;
1336             ix = (irowslen == -1) ? ix : irows[ix]-1;
1337             if (ISNAN(REAL(x)[ix])) { ans_na = TRUE; break; }
1338             REAL(sub)[j] = REAL(x)[ix];
1339             m += REAL(sub)[j]; // sum
1340           }
1341           if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
1342           m = m/thisgrpsize; // mean, first pass
1343           for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
1344           m += (s/thisgrpsize); // mean, second pass
1345           for (j=0; j<thisgrpsize; j++) { // variance
1346             v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
1347           }
1348           REAL(ans)[i] = (double)v/(thisgrpsize-1);
1349           if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1350         } else REAL(ans)[i] = NA_REAL;
1351       }
1352     } else {
1353       for (i=0; i<ngrp; i++) {
1354         m=0.; s=0.; v=0.; thisgrpsize = 0;
1355         if (grpsize[i] != 1) {
1356           SETLENGTH(sub, grpsize[i]); // to gather this group's data
1357           for (j=0; j<grpsize[i]; j++) {
1358             ix = ff[i]+j-1;
1359             if (isunsorted) ix = oo[ix]-1;
1360             ix = (irowslen == -1) ? ix : irows[ix]-1;
1361             if (ISNAN(REAL(x)[ix])) continue;
1362             REAL(sub)[thisgrpsize] = REAL(x)[ix];
1363             m += REAL(sub)[thisgrpsize]; // sum
1364             thisgrpsize++;
1365           }
1366           if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
1367           m = m/thisgrpsize; // mean, first pass
1368           for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
1369           m += (s/thisgrpsize); // mean, second pass
1370           for (j=0; j<thisgrpsize; j++) { // variance
1371             v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
1372           }
1373           REAL(ans)[i] = (double)v/(thisgrpsize-1);
1374           if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
1375         } else REAL(ans)[i] = NA_REAL;
1376       }
1377     }
1378     SETLENGTH(sub, maxgrpn);
1379     break;
1380   default:
1381       if (!isSD) {
1382         error(_("Type '%s' not supported by GForce var (gvar). Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1383       } else {
1384         error(_("Type '%s' not supported by GForce sd (gsd). Either add the prefix stats::sd(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1385       }
1386   }
1387   // no copyMostAttrib(x, ans) since class (e.g. Date) unlikely applicable to sd/var
1388   UNPROTECT(2);
1389   return (ans);
1390 }
1391 
gvar(SEXP x,SEXP narm)1392 SEXP gvar(SEXP x, SEXP narm) {
1393   return (gvarsd1(x, narm, FALSE));
1394 }
1395 
gsd(SEXP x,SEXP narm)1396 SEXP gsd(SEXP x, SEXP narm) {
1397   return (gvarsd1(x, narm, TRUE));
1398 }
1399 
gprod(SEXP x,SEXP narm)1400 SEXP gprod(SEXP x, SEXP narm)
1401 {
1402   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
1403   if (!isVectorAtomic(x)) error(_("GForce prod can only be applied to columns, not .SD or similar. To multiply all items in a list such as .SD, either add the prefix base::prod(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,prod),by=,.SDcols=]'"));
1404   if (inherits(x, "factor")) error(_("prod is not meaningful for factors."));
1405   int i, ix, thisgrp;
1406   int n = (irowslen == -1) ? length(x) : irowslen;
1407   //clock_t start = clock();
1408   SEXP ans;
1409   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod");
1410   long double *s = malloc(ngrp * sizeof(long double));
1411   if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double));
1412   for (i=0; i<ngrp; i++) s[i] = 1.0;
1413   ans = PROTECT(allocVector(REALSXP, ngrp));
1414   switch(TYPEOF(x)) {
1415   case LGLSXP: case INTSXP:
1416     for (i=0; i<n; i++) {
1417       thisgrp = grp[i];
1418       ix = (irowslen == -1) ? i : irows[i]-1;
1419       if(INTEGER(x)[ix] == NA_INTEGER) {
1420         if (!LOGICAL(narm)[0]) s[thisgrp] = NA_REAL;  // Let NA_REAL propogate from here. R_NaReal is IEEE.
1421         continue;
1422       }
1423       s[thisgrp] *= INTEGER(x)[ix];  // no under/overflow here, s is long double (like base)
1424     }
1425     for (i=0; i<ngrp; i++) {
1426       if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
1427       else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
1428       else REAL(ans)[i] = (double)s[i];
1429     }
1430     break;
1431   case REALSXP:
1432     for (i=0; i<n; i++) {
1433       thisgrp = grp[i];
1434       ix = (irowslen == -1) ? i : irows[i]-1;
1435       if(ISNAN(REAL(x)[ix]) && LOGICAL(narm)[0]) continue;  // else let NA_REAL propogate from here
1436       s[thisgrp] *= REAL(x)[ix];  // done in long double, like base
1437     }
1438     for (i=0; i<ngrp; i++) {
1439       if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
1440       else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
1441       else REAL(ans)[i] = (double)s[i];
1442     }
1443     break;
1444   default:
1445     free(s);
1446     error(_("Type '%s' not supported by GForce prod (gprod). Either add the prefix base::prod(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
1447   }
1448   free(s);
1449   copyMostAttrib(x, ans);
1450   UNPROTECT(1);
1451   // Rprintf(_("this gprod took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
1452   return(ans);
1453 }
1454