1 #include "data.table.h"
2 
nafillDouble(double * x,uint_fast64_t nx,unsigned int type,double fill,bool nan_is_na,ans_t * ans,bool verbose)3 void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose) {
4   double tic=0.0;
5   if (verbose)
6     tic = omp_get_wtime();
7   if (type==0) { // const
8     if (nan_is_na) {
9       for (uint_fast64_t i=0; i<nx; i++) {
10         ans->dbl_v[i] = ISNAN(x[i]) ? fill : x[i];
11       }
12     } else {
13       for (uint_fast64_t i=0; i<nx; i++) {
14         ans->dbl_v[i] = ISNA(x[i]) ? fill : x[i];
15       }
16     }
17   } else if (type==1) { // locf
18     ans->dbl_v[0] = x[0];
19     if (nan_is_na) {
20       for (uint_fast64_t i=1; i<nx; i++) {
21         ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i-1] : x[i];
22       }
23     } else {
24       for (uint_fast64_t i=1; i<nx; i++) {
25         ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i-1] : x[i];
26       }
27     }
28   } else if (type==2) { // nocb
29     ans->dbl_v[nx-1] = x[nx-1];
30     if (nan_is_na) {
31       for (int_fast64_t i=nx-2; i>=0; i--) {
32         ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i+1] : x[i];
33       }
34     } else {
35       for (int_fast64_t i=nx-2; i>=0; i--) {
36         ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i+1] : x[i];
37       }
38     }
39   }
40   if (verbose)
41     snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
42 }
nafillInteger(int32_t * x,uint_fast64_t nx,unsigned int type,int32_t fill,ans_t * ans,bool verbose)43 void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose) {
44   double tic=0.0;
45   if (verbose)
46     tic = omp_get_wtime();
47   if (type==0) { // const
48     for (uint_fast64_t i=0; i<nx; i++) {
49       ans->int_v[i] = x[i]==NA_INTEGER ? fill : x[i];
50     }
51   } else if (type==1) { // locf
52     ans->int_v[0] = x[0];
53     for (uint_fast64_t i=1; i<nx; i++) {
54       ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i-1] : x[i];
55     }
56   } else if (type==2) { // nocb
57     ans->int_v[nx-1] = x[nx-1];
58     for (int_fast64_t i=nx-2; i>=0; i--) {
59       ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i+1] : x[i];
60     }
61   }
62   if (verbose)
63     snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
64 }
nafillInteger64(int64_t * x,uint_fast64_t nx,unsigned int type,int64_t fill,ans_t * ans,bool verbose)65 void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fill, ans_t *ans, bool verbose) {
66   double tic=0.0;
67   if (verbose)
68     tic = omp_get_wtime();
69   if (type==0) { // const
70     for (uint_fast64_t i=0; i<nx; i++) {
71       ans->int64_v[i] = x[i]==NA_INTEGER64 ? fill : x[i];
72     }
73   } else if (type==1) { // locf
74     ans->int64_v[0] = x[0];
75     for (uint_fast64_t i=1; i<nx; i++) {
76       ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i-1] : x[i];
77     }
78   } else if (type==2) { // nocb
79     ans->int64_v[nx-1] = x[nx-1];
80     for (int_fast64_t i=nx-2; i>=0; i--) {
81       ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i+1] : x[i];
82     }
83   }
84   if (verbose)
85     snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
86 }
87 
nafillR(SEXP obj,SEXP type,SEXP fill,SEXP nan_is_na_arg,SEXP inplace,SEXP cols)88 SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols) {
89   int protecti=0;
90   const bool verbose = GetVerbose();
91 
92   if (!xlength(obj))
93     return(obj);
94 
95   bool binplace = LOGICAL(inplace)[0];
96   SEXP x = R_NilValue;
97   if (isVectorAtomic(obj)) {
98     if (binplace)
99       error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table"));
100     else if (!isReal(obj) && !isInteger(obj))
101       error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
102     x = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
103     SET_VECTOR_ELT(x, 0, obj);
104   } else {
105     SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
106     x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
107     int *icols = INTEGER(ricols);
108     for (int i=0; i<length(ricols); i++) {
109       SEXP this_col = VECTOR_ELT(obj, icols[i]-1);
110       if (!isReal(this_col) && !isInteger(this_col))
111         error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
112       SET_VECTOR_ELT(x, i, this_col);
113     }
114   }
115   R_len_t nx = length(x);
116 
117   double **dx = (double**)R_alloc(nx, sizeof(double*));
118   int32_t **ix = (int32_t**)R_alloc(nx, sizeof(int32_t*));
119   int64_t **i64x = (int64_t**)R_alloc(nx, sizeof(int64_t*));
120   uint_fast64_t *inx = (uint_fast64_t*)R_alloc(nx, sizeof(uint_fast64_t));
121   SEXP ans = R_NilValue;
122   ans_t *vans = (ans_t *)R_alloc(nx, sizeof(ans_t));
123   for (R_len_t i=0; i<nx; i++) {
124     const SEXP xi = VECTOR_ELT(x, i);
125     inx[i] = xlength(xi);
126     // not sure why these pointers are being constructed like this; TODO: simplify structure
127     if (isReal(xi)) {
128       dx[i] = REAL(xi);
129       i64x[i] = (int64_t *)REAL(xi);
130       ix[i] = NULL;
131     } else {
132       ix[i] = INTEGER(xi);
133       dx[i] = NULL;
134       i64x[i] = NULL;
135     }
136   }
137   if (!binplace) {
138     ans = PROTECT(allocVector(VECSXP, nx)); protecti++;
139     for (R_len_t i=0; i<nx; i++) {
140       SET_VECTOR_ELT(ans, i, allocVector(TYPEOF(VECTOR_ELT(x, i)), inx[i]));
141       const SEXP ansi = VECTOR_ELT(ans, i);
142       const void *p = isReal(ansi) ? (void *)REAL(ansi) : (void *)INTEGER(ansi);
143       vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .status=0, .message={"\0","\0","\0","\0"} });
144     }
145   } else {
146     for (R_len_t i=0; i<nx; i++) {
147       vans[i] = ((ans_t) { .dbl_v=dx[i], .int_v=ix[i], .int64_v=i64x[i], .status=0, .message={"\0","\0","\0","\0"} });
148     }
149   }
150 
151   unsigned int itype;
152   if (!strcmp(CHAR(STRING_ELT(type, 0)), "const"))
153     itype = 0;
154   else if (!strcmp(CHAR(STRING_ELT(type, 0)), "locf"))
155     itype = 1;
156   else if (!strcmp(CHAR(STRING_ELT(type, 0)), "nocb"))
157     itype = 2;
158   else
159     error(_("Internal error: invalid type argument in nafillR function, should have been caught before. Please report to data.table issue tracker.")); // # nocov
160 
161   if (itype==0 && length(fill)!=1)
162     error(_("fill must be a vector of length 1"));
163 
164   double dfill=NA_REAL;
165   int32_t ifill=NA_INTEGER;
166   int64_t i64fill=NA_INTEGER64;
167   if (itype==0)
168     coerceFill(fill, &dfill, &ifill, &i64fill);
169 
170   double tic=0.0, toc=0.0;
171   if (verbose)
172     tic = omp_get_wtime();
173   #pragma omp parallel for schedule(dynamic) num_threads(getDTthreads(nx, false))
174   for (R_len_t i=0; i<nx; i++) {
175     SEXP this_x = VECTOR_ELT(x, i);
176     switch (TYPEOF(this_x)) {
177     case REALSXP : {
178       if (INHERITS(this_x, char_integer64) || INHERITS(this_x, char_nanotime)) {  // inside parallel region so can't call Rinherits()
179         nafillInteger64(i64x[i], inx[i], itype, i64fill, &vans[i], verbose);
180       } else {
181         if (!IS_TRUE_OR_FALSE(nan_is_na_arg))
182           error(_("nan_is_na must be TRUE or FALSE")); // # nocov
183         bool nan_is_na = LOGICAL(nan_is_na_arg)[0];
184         nafillDouble(dx[i], inx[i], itype, dfill, nan_is_na, &vans[i], verbose);
185       }
186     } break;
187     case INTSXP : {
188       nafillInteger(ix[i], inx[i], itype, ifill, &vans[i], verbose);
189     } break;
190     default: error(_("Internal error: invalid type argument in nafillR function, should have been caught before. Please report to data.table issue tracker.")); // # nocov
191     }
192   }
193   if (verbose)
194     toc = omp_get_wtime();
195 
196   if (!binplace) {
197     for (R_len_t i=0; i<nx; i++) {
198       if (!isNull(ATTRIB(VECTOR_ELT(x, i))))
199         copyMostAttrib(VECTOR_ELT(x, i), VECTOR_ELT(ans, i));
200     }
201   }
202 
203   ansMsg(vans, nx, verbose, __func__);
204 
205   if (verbose)
206     Rprintf(_("%s: parallel processing of %d column(s) took %.3fs\n"), __func__, nx, toc-tic);
207 
208   UNPROTECT(protecti);
209   if (binplace) {
210     return obj;
211   } else {
212     return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
213   }
214 }
215