1 #include "myomp.h" // first for clang-13-omp, #5122 2 #include "dt_stdio.h" // PRId64 and PRIu64 3 #include <R.h> 4 #include <Rversion.h> 5 #if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0) // R-exts$6.14 6 # define ALTREP(x) 0 // #2866 7 # define USE_RINTERNALS // #3301 8 # define DATAPTR_RO(x) ((const void *)DATAPTR(x)) 9 #endif 10 #include <Rinternals.h> 11 #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT 12 #include <stdint.h> // for uint64_t rather than unsigned long long 13 #include <stdbool.h> 14 #include "types.h" 15 #include "po.h" 16 #ifdef WIN32 // positional specifiers (%n$) used in translations; #4402 17 # define snprintf dt_win_snprintf // see our snprintf.c; tried and failed to link to _sprintf_p on Windows 18 #endif 19 #ifdef sprintf 20 #undef sprintf 21 #endif 22 #define sprintf USE_SNPRINTF_NOT_SPRINTF // prevent use of sprintf in data.table source; force us to use n always 23 24 // #include <signal.h> // the debugging machinery + breakpoint aidee 25 // raise(SIGINT); 26 27 #define IS_UTF8(x) (LEVELS(x) & 8) 28 #define IS_ASCII(x) (LEVELS(x) & 64) 29 #define IS_LATIN(x) (LEVELS(x) & 4) 30 #define IS_TRUE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE) 31 #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE) 32 #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL) 33 34 #define SIZEOF(x) __sizes[TYPEOF(x)] 35 #define TYPEORDER(x) __typeorder[x] 36 37 #ifdef MIN 38 # undef MIN 39 #endif 40 #define MIN(a,b) (((a)<(b))?(a):(b)) 41 42 #ifdef MAX 43 # undef MAX 44 #endif 45 #define MAX(a,b) (((a)>(b))?(a):(b)) 46 47 // for use with bit64::integer64 48 #define NA_INTEGER64 INT64_MIN 49 #define MAX_INTEGER64 INT64_MAX 50 51 // for use with CPLXSXP, no macro provided by R internals 52 #define ISNAN_COMPLEX(x) (ISNAN((x).r) || ISNAN((x).i)) // TRUE if either real or imaginary component is NA or NaN 53 54 // Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0 55 #ifndef MAYBE_SHARED 56 # define MAYBE_SHARED(x) (NAMED(x) > 1) 57 #endif 58 #ifndef MAYBE_REFERENCED 59 # define MAYBE_REFERENCED(x) ( NAMED(x) > 0 ) 60 #endif 61 62 // If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will 63 // always be checked in UTF8 locale. This seems to be the best fix Arun could think of to put the encoding issues to rest. 64 // Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues in 65 // most cases. Fix for #66, #69, #469 and #1293 66 // TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings, and again after Jan 2018 when made macro. 67 // Matt moved this to be macro in Jan 2018 so that branch can benefit from branch prediction too wherever used inside loops. 68 // This IS_ASCII will dereference s and that cache fetch is the part that may bite more than the branch, though. Without a call to 69 // to ENC2UTF as all, the pointer value can just be compared by the calling code without deferencing it. It may still be worth 70 // timing the impact and manually avoiding (is there an IS_ASCII on the character vector rather than testing each item every time?) 71 #define NEED2UTF8(s) !(IS_ASCII(s) || (s)==NA_STRING || IS_UTF8(s)) 72 #define ENC2UTF8(s) (!NEED2UTF8(s) ? (s) : mkCharCE(translateCharUTF8(s), CE_UTF8)) 73 74 // init.c 75 extern SEXP char_integer64; 76 extern SEXP char_ITime; 77 extern SEXP char_IDate; 78 extern SEXP char_Date; 79 extern SEXP char_POSIXct; 80 extern SEXP char_POSIXt; 81 extern SEXP char_UTC; 82 extern SEXP char_nanotime; 83 extern SEXP char_lens; 84 extern SEXP char_indices; 85 extern SEXP char_allLen1; 86 extern SEXP char_allGrp1; 87 extern SEXP char_factor; 88 extern SEXP char_ordered; 89 extern SEXP char_datatable; 90 extern SEXP char_dataframe; 91 extern SEXP char_NULL; 92 extern SEXP sym_sorted; 93 extern SEXP sym_index; 94 extern SEXP sym_BY; 95 extern SEXP sym_starts, char_starts; 96 extern SEXP sym_maxgrpn; 97 extern SEXP sym_colClassesAs; 98 extern SEXP sym_verbose; 99 extern SEXP SelfRefSymbol; 100 extern SEXP sym_inherits; 101 extern SEXP sym_datatable_locked; 102 extern SEXP sym_tzone; 103 extern SEXP sym_old_fread_datetime_character; 104 extern double NA_INT64_D; 105 extern long long NA_INT64_LL; 106 extern Rcomplex NA_CPLX; // initialized in init.c; see there for comments 107 extern size_t __sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h 108 extern size_t __typeorder[100]; // __ prefix otherwise if we use these names directly, the SIZEOF define ends up using the local one 109 110 long long DtoLL(double x); 111 double LLtoD(long long x); 112 bool GetVerbose(); 113 114 // cj.c 115 SEXP cj(SEXP base_list); 116 117 // dogroups.c 118 SEXP keepattr(SEXP to, SEXP from); 119 SEXP growVector(SEXP x, R_len_t newlen); 120 121 // assign.c 122 SEXP allocNAVector(SEXPTYPE type, R_len_t n); 123 SEXP allocNAVectorLike(SEXP x, R_len_t n); 124 void writeNA(SEXP v, const int from, const int n); 125 void savetl_init(), savetl(SEXP s), savetl_end(); 126 int checkOverAlloc(SEXP x); 127 128 // forder.c 129 int StrCmp(SEXP x, SEXP y); 130 uint64_t dtwiddle(double x); 131 SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg); 132 int getNumericRounding_C(); 133 134 // reorder.c 135 SEXP reorder(SEXP x, SEXP order); 136 SEXP setcolorder(SEXP x, SEXP o); 137 138 // subset.c 139 void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA); 140 SEXP subsetVector(SEXP x, SEXP idx); 141 142 // fcast.c 143 SEXP int_vec_init(R_len_t n, int val); 144 145 // vecseq.c 146 SEXP vecseq(SEXP x, SEXP len, SEXP clamp); 147 148 // uniqlist.c 149 SEXP uniqlist(SEXP l, SEXP order); 150 SEXP uniqlengths(SEXP x, SEXP n); 151 152 // chmatch.c 153 SEXP chmatch(SEXP x, SEXP table, int nomatch); 154 SEXP chin(SEXP x, SEXP table); 155 156 SEXP isOrderedSubset(SEXP, SEXP); 157 void setselfref(SEXP); 158 159 // fmelt.c 160 SEXP seq_int(int n, int start); 161 SEXP set_diff(SEXP x, int n); 162 SEXP which(SEXP x, Rboolean val); 163 164 // frank.c 165 SEXP dt_na(SEXP x, SEXP cols); 166 167 // assign.c 168 SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose); 169 const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname); 170 SEXP shallowwrapper(SEXP dt, SEXP cols); 171 172 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, 173 SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, 174 SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, 175 SEXP on, SEXP verbose); 176 177 // bmerge.c 178 SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, 179 SEXP xoArg, SEXP rollarg, SEXP rollendsArg, SEXP nomatchArg, 180 SEXP multArg, SEXP opArg, SEXP nqgrpArg, SEXP nqmaxgrpArg); 181 182 // quickselect 183 double dquickselect(double *x, int n); 184 double iquickselect(int *x, int n); 185 double i64quickselect(int64_t *x, int n); 186 187 // fread.c 188 double wallclock(); 189 190 // openmp-utils.c 191 void initDTthreads(); 192 int getDTthreads(const int64_t n, const bool throttle); 193 void avoid_openmp_hang_within_fork(); 194 195 // froll.c 196 void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); 197 void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); 198 void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); 199 void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); 200 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); 201 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); 202 void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose); 203 204 // frolladaptive.c 205 void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 206 void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 207 void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 208 void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 209 void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 210 void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); 211 212 // frollR.c 213 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive); 214 SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho); 215 216 // nafill.c 217 void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose); 218 void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose); 219 SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols); 220 221 // between.c 222 SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP check); 223 224 // coalesce.c 225 SEXP coalesce(SEXP x, SEXP inplace); 226 227 // utils.c 228 bool isRealReallyInt(SEXP x); 229 SEXP isReallyReal(SEXP x); 230 bool allNA(SEXP x, bool errorForBadType); 231 SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); 232 void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill); 233 SEXP coerceFillR(SEXP fill); 234 bool INHERITS(SEXP x, SEXP char_); 235 bool Rinherits(SEXP x, SEXP char_); 236 SEXP copyAsPlain(SEXP x); 237 void copySharedColumns(SEXP x); 238 SEXP lock(SEXP x); 239 SEXP unlock(SEXP x); 240 bool islocked(SEXP x); 241 SEXP islockedR(SEXP x); 242 bool need2utf8(SEXP x); 243 SEXP coerceUtf8IfNeeded(SEXP x); 244 245 // types.c 246 char *end(char *start); 247 void ansMsg(ans_t *ans, int n, bool verbose, const char *func); 248 SEXP testMsgR(SEXP status, SEXP x, SEXP k); 249 250 //fifelse.c 251 SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); 252 SEXP fcaseR(SEXP na, SEXP rho, SEXP args); 253 254 //snprintf.c 255 int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...); 256 257