1 #include "data.table.h"
2
nafillDouble(double * x,uint_fast64_t nx,unsigned int type,double fill,bool nan_is_na,ans_t * ans,bool verbose)3 void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose) {
4 double tic=0.0;
5 if (verbose)
6 tic = omp_get_wtime();
7 if (type==0) { // const
8 if (nan_is_na) {
9 for (uint_fast64_t i=0; i<nx; i++) {
10 ans->dbl_v[i] = ISNAN(x[i]) ? fill : x[i];
11 }
12 } else {
13 for (uint_fast64_t i=0; i<nx; i++) {
14 ans->dbl_v[i] = ISNA(x[i]) ? fill : x[i];
15 }
16 }
17 } else if (type==1) { // locf
18 ans->dbl_v[0] = x[0];
19 if (nan_is_na) {
20 for (uint_fast64_t i=1; i<nx; i++) {
21 ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i-1] : x[i];
22 }
23 } else {
24 for (uint_fast64_t i=1; i<nx; i++) {
25 ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i-1] : x[i];
26 }
27 }
28 } else if (type==2) { // nocb
29 ans->dbl_v[nx-1] = x[nx-1];
30 if (nan_is_na) {
31 for (int_fast64_t i=nx-2; i>=0; i--) {
32 ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i+1] : x[i];
33 }
34 } else {
35 for (int_fast64_t i=nx-2; i>=0; i--) {
36 ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i+1] : x[i];
37 }
38 }
39 }
40 if (verbose)
41 snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
42 }
nafillInteger(int32_t * x,uint_fast64_t nx,unsigned int type,int32_t fill,ans_t * ans,bool verbose)43 void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose) {
44 double tic=0.0;
45 if (verbose)
46 tic = omp_get_wtime();
47 if (type==0) { // const
48 for (uint_fast64_t i=0; i<nx; i++) {
49 ans->int_v[i] = x[i]==NA_INTEGER ? fill : x[i];
50 }
51 } else if (type==1) { // locf
52 ans->int_v[0] = x[0];
53 for (uint_fast64_t i=1; i<nx; i++) {
54 ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i-1] : x[i];
55 }
56 } else if (type==2) { // nocb
57 ans->int_v[nx-1] = x[nx-1];
58 for (int_fast64_t i=nx-2; i>=0; i--) {
59 ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i+1] : x[i];
60 }
61 }
62 if (verbose)
63 snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
64 }
nafillInteger64(int64_t * x,uint_fast64_t nx,unsigned int type,int64_t fill,ans_t * ans,bool verbose)65 void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fill, ans_t *ans, bool verbose) {
66 double tic=0.0;
67 if (verbose)
68 tic = omp_get_wtime();
69 if (type==0) { // const
70 for (uint_fast64_t i=0; i<nx; i++) {
71 ans->int64_v[i] = x[i]==NA_INTEGER64 ? fill : x[i];
72 }
73 } else if (type==1) { // locf
74 ans->int64_v[0] = x[0];
75 for (uint_fast64_t i=1; i<nx; i++) {
76 ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i-1] : x[i];
77 }
78 } else if (type==2) { // nocb
79 ans->int64_v[nx-1] = x[nx-1];
80 for (int_fast64_t i=nx-2; i>=0; i--) {
81 ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i+1] : x[i];
82 }
83 }
84 if (verbose)
85 snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
86 }
87
nafillR(SEXP obj,SEXP type,SEXP fill,SEXP nan_is_na_arg,SEXP inplace,SEXP cols)88 SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols) {
89 int protecti=0;
90 const bool verbose = GetVerbose();
91
92 if (!xlength(obj))
93 return(obj);
94
95 bool binplace = LOGICAL(inplace)[0];
96 SEXP x = R_NilValue;
97 if (isVectorAtomic(obj)) {
98 if (binplace)
99 error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table"));
100 else if (!isReal(obj) && !isInteger(obj))
101 error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
102 x = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
103 SET_VECTOR_ELT(x, 0, obj);
104 } else {
105 SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
106 x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
107 int *icols = INTEGER(ricols);
108 for (int i=0; i<length(ricols); i++) {
109 SEXP this_col = VECTOR_ELT(obj, icols[i]-1);
110 if (!isReal(this_col) && !isInteger(this_col))
111 error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
112 SET_VECTOR_ELT(x, i, this_col);
113 }
114 }
115 R_len_t nx = length(x);
116
117 double **dx = (double**)R_alloc(nx, sizeof(double*));
118 int32_t **ix = (int32_t**)R_alloc(nx, sizeof(int32_t*));
119 int64_t **i64x = (int64_t**)R_alloc(nx, sizeof(int64_t*));
120 uint_fast64_t *inx = (uint_fast64_t*)R_alloc(nx, sizeof(uint_fast64_t));
121 SEXP ans = R_NilValue;
122 ans_t *vans = (ans_t *)R_alloc(nx, sizeof(ans_t));
123 for (R_len_t i=0; i<nx; i++) {
124 const SEXP xi = VECTOR_ELT(x, i);
125 inx[i] = xlength(xi);
126 // not sure why these pointers are being constructed like this; TODO: simplify structure
127 if (isReal(xi)) {
128 dx[i] = REAL(xi);
129 i64x[i] = (int64_t *)REAL(xi);
130 ix[i] = NULL;
131 } else {
132 ix[i] = INTEGER(xi);
133 dx[i] = NULL;
134 i64x[i] = NULL;
135 }
136 }
137 if (!binplace) {
138 ans = PROTECT(allocVector(VECSXP, nx)); protecti++;
139 for (R_len_t i=0; i<nx; i++) {
140 SET_VECTOR_ELT(ans, i, allocVector(TYPEOF(VECTOR_ELT(x, i)), inx[i]));
141 const SEXP ansi = VECTOR_ELT(ans, i);
142 const void *p = isReal(ansi) ? (void *)REAL(ansi) : (void *)INTEGER(ansi);
143 vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .status=0, .message={"\0","\0","\0","\0"} });
144 }
145 } else {
146 for (R_len_t i=0; i<nx; i++) {
147 vans[i] = ((ans_t) { .dbl_v=dx[i], .int_v=ix[i], .int64_v=i64x[i], .status=0, .message={"\0","\0","\0","\0"} });
148 }
149 }
150
151 unsigned int itype;
152 if (!strcmp(CHAR(STRING_ELT(type, 0)), "const"))
153 itype = 0;
154 else if (!strcmp(CHAR(STRING_ELT(type, 0)), "locf"))
155 itype = 1;
156 else if (!strcmp(CHAR(STRING_ELT(type, 0)), "nocb"))
157 itype = 2;
158 else
159 error(_("Internal error: invalid type argument in nafillR function, should have been caught before. Please report to data.table issue tracker.")); // # nocov
160
161 if (itype==0 && length(fill)!=1)
162 error(_("fill must be a vector of length 1"));
163
164 double dfill=NA_REAL;
165 int32_t ifill=NA_INTEGER;
166 int64_t i64fill=NA_INTEGER64;
167 if (itype==0)
168 coerceFill(fill, &dfill, &ifill, &i64fill);
169
170 double tic=0.0, toc=0.0;
171 if (verbose)
172 tic = omp_get_wtime();
173 #pragma omp parallel for schedule(dynamic) num_threads(getDTthreads(nx, false))
174 for (R_len_t i=0; i<nx; i++) {
175 SEXP this_x = VECTOR_ELT(x, i);
176 switch (TYPEOF(this_x)) {
177 case REALSXP : {
178 if (INHERITS(this_x, char_integer64) || INHERITS(this_x, char_nanotime)) { // inside parallel region so can't call Rinherits()
179 nafillInteger64(i64x[i], inx[i], itype, i64fill, &vans[i], verbose);
180 } else {
181 if (!IS_TRUE_OR_FALSE(nan_is_na_arg))
182 error(_("nan_is_na must be TRUE or FALSE")); // # nocov
183 bool nan_is_na = LOGICAL(nan_is_na_arg)[0];
184 nafillDouble(dx[i], inx[i], itype, dfill, nan_is_na, &vans[i], verbose);
185 }
186 } break;
187 case INTSXP : {
188 nafillInteger(ix[i], inx[i], itype, ifill, &vans[i], verbose);
189 } break;
190 default: error(_("Internal error: invalid type argument in nafillR function, should have been caught before. Please report to data.table issue tracker.")); // # nocov
191 }
192 }
193 if (verbose)
194 toc = omp_get_wtime();
195
196 if (!binplace) {
197 for (R_len_t i=0; i<nx; i++) {
198 if (!isNull(ATTRIB(VECTOR_ELT(x, i))))
199 copyMostAttrib(VECTOR_ELT(x, i), VECTOR_ELT(ans, i));
200 }
201 }
202
203 ansMsg(vans, nx, verbose, __func__);
204
205 if (verbose)
206 Rprintf(_("%s: parallel processing of %d column(s) took %.3fs\n"), __func__, nx, toc-tic);
207
208 UNPROTECT(protecti);
209 if (binplace) {
210 return obj;
211 } else {
212 return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
213 }
214 }
215