1 #include "myomp.h"     // first for clang-13-omp, #5122
2 #include "dt_stdio.h"  // PRId64 and PRIu64
3 #include <R.h>
4 #include <Rversion.h>
5 #if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0)  // R-exts$6.14
6 #  define ALTREP(x) 0     // #2866
7 #  define USE_RINTERNALS  // #3301
8 #  define DATAPTR_RO(x) ((const void *)DATAPTR(x))
9 #endif
10 #include <Rinternals.h>
11 #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x))  // to avoid overhead of looped STRING_ELT and VECTOR_ELT
12 #include <stdint.h>    // for uint64_t rather than unsigned long long
13 #include <stdbool.h>
14 #include "types.h"
15 #include "po.h"
16 #ifdef WIN32  // positional specifiers (%n$) used in translations; #4402
17 #  define snprintf dt_win_snprintf  // see our snprintf.c; tried and failed to link to _sprintf_p on Windows
18 #endif
19 #ifdef sprintf
20 #undef sprintf
21 #endif
22 #define sprintf USE_SNPRINTF_NOT_SPRINTF  // prevent use of sprintf in data.table source; force us to use n always
23 
24 // #include <signal.h> // the debugging machinery + breakpoint aidee
25 // raise(SIGINT);
26 
27 #define IS_UTF8(x)  (LEVELS(x) & 8)
28 #define IS_ASCII(x) (LEVELS(x) & 64)
29 #define IS_LATIN(x) (LEVELS(x) & 4)
30 #define IS_TRUE(x)  (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE)
31 #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE)
32 #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL)
33 
34 #define SIZEOF(x) __sizes[TYPEOF(x)]
35 #define TYPEORDER(x) __typeorder[x]
36 
37 #ifdef MIN
38 #  undef MIN
39 #endif
40 #define MIN(a,b) (((a)<(b))?(a):(b))
41 
42 #ifdef MAX
43 #  undef MAX
44 #endif
45 #define MAX(a,b) (((a)>(b))?(a):(b))
46 
47 // for use with bit64::integer64
48 #define NA_INTEGER64  INT64_MIN
49 #define MAX_INTEGER64 INT64_MAX
50 
51 // for use with CPLXSXP, no macro provided by R internals
52 #define ISNAN_COMPLEX(x) (ISNAN((x).r) || ISNAN((x).i)) // TRUE if either real or imaginary component is NA or NaN
53 
54 // Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0
55 #ifndef MAYBE_SHARED
56 #  define MAYBE_SHARED(x) (NAMED(x) > 1)
57 #endif
58 #ifndef MAYBE_REFERENCED
59 #  define MAYBE_REFERENCED(x) ( NAMED(x) > 0 )
60 #endif
61 
62 // If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will
63 // always be checked in UTF8 locale. This seems to be the best fix Arun could think of to put the encoding issues to rest.
64 // Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues in
65 // most cases. Fix for #66, #69, #469 and #1293
66 // TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings, and again after Jan 2018 when made macro.
67 // Matt moved this to be macro in Jan 2018 so that branch can benefit from branch prediction too wherever used inside loops.
68 // This IS_ASCII will dereference s and that cache fetch is the part that may bite more than the branch, though. Without a call to
69 // to ENC2UTF as all, the pointer value can just be compared by the calling code without deferencing it. It may still be worth
70 // timing the impact and manually avoiding (is there an IS_ASCII on the character vector rather than testing each item every time?)
71 #define NEED2UTF8(s) !(IS_ASCII(s) || (s)==NA_STRING || IS_UTF8(s))
72 #define ENC2UTF8(s) (!NEED2UTF8(s) ? (s) : mkCharCE(translateCharUTF8(s), CE_UTF8))
73 
74 // init.c
75 extern SEXP char_integer64;
76 extern SEXP char_ITime;
77 extern SEXP char_IDate;
78 extern SEXP char_Date;
79 extern SEXP char_POSIXct;
80 extern SEXP char_POSIXt;
81 extern SEXP char_UTC;
82 extern SEXP char_nanotime;
83 extern SEXP char_lens;
84 extern SEXP char_indices;
85 extern SEXP char_allLen1;
86 extern SEXP char_allGrp1;
87 extern SEXP char_factor;
88 extern SEXP char_ordered;
89 extern SEXP char_datatable;
90 extern SEXP char_dataframe;
91 extern SEXP char_NULL;
92 extern SEXP sym_sorted;
93 extern SEXP sym_index;
94 extern SEXP sym_BY;
95 extern SEXP sym_starts, char_starts;
96 extern SEXP sym_maxgrpn;
97 extern SEXP sym_colClassesAs;
98 extern SEXP sym_verbose;
99 extern SEXP SelfRefSymbol;
100 extern SEXP sym_inherits;
101 extern SEXP sym_datatable_locked;
102 extern SEXP sym_tzone;
103 extern SEXP sym_old_fread_datetime_character;
104 extern double NA_INT64_D;
105 extern long long NA_INT64_LL;
106 extern Rcomplex NA_CPLX;  // initialized in init.c; see there for comments
107 extern size_t __sizes[100];     // max appears to be FUNSXP = 99, see Rinternals.h
108 extern size_t __typeorder[100]; // __ prefix otherwise if we use these names directly, the SIZEOF define ends up using the local one
109 
110 long long DtoLL(double x);
111 double LLtoD(long long x);
112 bool GetVerbose();
113 
114 // cj.c
115 SEXP cj(SEXP base_list);
116 
117 // dogroups.c
118 SEXP keepattr(SEXP to, SEXP from);
119 SEXP growVector(SEXP x, R_len_t newlen);
120 
121 // assign.c
122 SEXP allocNAVector(SEXPTYPE type, R_len_t n);
123 SEXP allocNAVectorLike(SEXP x, R_len_t n);
124 void writeNA(SEXP v, const int from, const int n);
125 void savetl_init(), savetl(SEXP s), savetl_end();
126 int checkOverAlloc(SEXP x);
127 
128 // forder.c
129 int StrCmp(SEXP x, SEXP y);
130 uint64_t dtwiddle(double x);
131 SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
132 int getNumericRounding_C();
133 
134 // reorder.c
135 SEXP reorder(SEXP x, SEXP order);
136 SEXP setcolorder(SEXP x, SEXP o);
137 
138 // subset.c
139 void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA);
140 SEXP subsetVector(SEXP x, SEXP idx);
141 
142 // fcast.c
143 SEXP int_vec_init(R_len_t n, int val);
144 
145 // vecseq.c
146 SEXP vecseq(SEXP x, SEXP len, SEXP clamp);
147 
148 // uniqlist.c
149 SEXP uniqlist(SEXP l, SEXP order);
150 SEXP uniqlengths(SEXP x, SEXP n);
151 
152 // chmatch.c
153 SEXP chmatch(SEXP x, SEXP table, int nomatch);
154 SEXP chin(SEXP x, SEXP table);
155 
156 SEXP isOrderedSubset(SEXP, SEXP);
157 void setselfref(SEXP);
158 
159 // fmelt.c
160 SEXP seq_int(int n, int start);
161 SEXP set_diff(SEXP x, int n);
162 SEXP which(SEXP x, Rboolean val);
163 
164 // frank.c
165 SEXP dt_na(SEXP x, SEXP cols);
166 
167 // assign.c
168 SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose);
169 const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname);
170 SEXP shallowwrapper(SEXP dt, SEXP cols);
171 
172 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols,
173                 SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts,
174                 SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames,
175                 SEXP on, SEXP verbose);
176 
177 // bmerge.c
178 SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted,
179                 SEXP xoArg, SEXP rollarg, SEXP rollendsArg, SEXP nomatchArg,
180                 SEXP multArg, SEXP opArg, SEXP nqgrpArg, SEXP nqmaxgrpArg);
181 
182 // quickselect
183 double dquickselect(double *x, int n);
184 double iquickselect(int *x, int n);
185 double i64quickselect(int64_t *x, int n);
186 
187 // fread.c
188 double wallclock();
189 
190 // openmp-utils.c
191 void initDTthreads();
192 int getDTthreads(const int64_t n, const bool throttle);
193 void avoid_openmp_hang_within_fork();
194 
195 // froll.c
196 void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
197 void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
198 void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
199 void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
200 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
201 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
202 void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
203 
204 // frolladaptive.c
205 void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
206 void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
207 void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
208 void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
209 void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
210 void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
211 
212 // frollR.c
213 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
214 SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho);
215 
216 // nafill.c
217 void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose);
218 void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose);
219 SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols);
220 
221 // between.c
222 SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP check);
223 
224 // coalesce.c
225 SEXP coalesce(SEXP x, SEXP inplace);
226 
227 // utils.c
228 bool isRealReallyInt(SEXP x);
229 SEXP isReallyReal(SEXP x);
230 bool allNA(SEXP x, bool errorForBadType);
231 SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups);
232 void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill);
233 SEXP coerceFillR(SEXP fill);
234 bool INHERITS(SEXP x, SEXP char_);
235 bool Rinherits(SEXP x, SEXP char_);
236 SEXP copyAsPlain(SEXP x);
237 void copySharedColumns(SEXP x);
238 SEXP lock(SEXP x);
239 SEXP unlock(SEXP x);
240 bool islocked(SEXP x);
241 SEXP islockedR(SEXP x);
242 bool need2utf8(SEXP x);
243 SEXP coerceUtf8IfNeeded(SEXP x);
244 
245 // types.c
246 char *end(char *start);
247 void ansMsg(ans_t *ans, int n, bool verbose, const char *func);
248 SEXP testMsgR(SEXP status, SEXP x, SEXP k);
249 
250 //fifelse.c
251 SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
252 SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
253 
254 //snprintf.c
255 int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);
256 
257