1 #include <cpp11/list.hpp>
2 #include <cpp11/strings.hpp>
3
4 #include "DateTime.h"
5 #include "DateTimeParser.h"
6 #include "LocaleInfo.h"
7 #include "r_utils.h"
8
9 #include "vroom_dbl.h"
10 #include "vroom_lgl.h"
11 #include "vroom_num.h"
12
13 typedef bool (*canParseFun)(const std::string&, LocaleInfo* pLocale);
14
canParse(const cpp11::strings & x,const canParseFun & canParse,LocaleInfo * pLocale)15 bool canParse(
16 const cpp11::strings& x, const canParseFun& canParse, LocaleInfo* pLocale) {
17 for (auto&& i : x) {
18 if (i == NA_STRING)
19 continue;
20
21 if (i.size() == 0)
22 continue;
23
24 if (!canParse(std::string(i), pLocale))
25 return false;
26 }
27 return true;
28 }
29
allMissing(const cpp11::strings & x)30 bool allMissing(const cpp11::strings& x) {
31 for (auto&& i : x) {
32 if (i != NA_STRING && i.size() > 0)
33 return false;
34 }
35 return true;
36 }
37
isLogical(const std::string & x,LocaleInfo *)38 bool isLogical(const std::string& x, LocaleInfo* /* pLocale */) {
39 const char* const str = x.data();
40 int res = parse_logical(str, str + x.size());
41 return res != NA_LOGICAL;
42 }
43
isNumber(const std::string & x,LocaleInfo * pLocale)44 bool isNumber(const std::string& x, LocaleInfo* pLocale) {
45 // Leading zero not followed by decimal mark
46 if (x[0] == '0' && x.size() > 1 &&
47 !matches(x.data() + 1, x.data() + x.size(), pLocale->decimalMark_))
48 return false;
49
50 auto str = vroom::string(x);
51 auto num = parse_num(str.begin(), str.end(), *pLocale, true);
52
53 return !ISNA(num);
54 }
55
isInteger(const std::string & x,LocaleInfo *)56 bool isInteger(const std::string& x, LocaleInfo* /* pLocale */) {
57 // Leading zero
58 if (x[0] == '0' && x.size() > 1)
59 return false;
60
61 double res = 0;
62 std::string::const_iterator begin = x.begin(), end = x.end();
63
64 return parseInt(begin, end, res) && begin == end;
65 }
66
isDouble(const std::string & x,LocaleInfo * pLocale)67 bool isDouble(const std::string& x, LocaleInfo* pLocale) {
68 // Leading zero not followed by decimal mark
69 if (x[0] == '0' && x.size() > 1 && x[1] != pLocale->decimalMark_[0])
70 return false;
71
72 double res =
73 bsd_strtod(x.data(), x.data() + x.size(), pLocale->decimalMark_[0]);
74
75 return !ISNA(res);
76 }
77
isTime(const std::string & x,LocaleInfo * pLocale)78 bool isTime(const std::string& x, LocaleInfo* pLocale) {
79 DateTimeParser parser(pLocale);
80
81 parser.setDate(x.c_str(), x.c_str() + x.size());
82 return parser.parseLocaleTime();
83 }
84
isDate(const std::string & x,LocaleInfo * pLocale)85 bool isDate(const std::string& x, LocaleInfo* pLocale) {
86 DateTimeParser parser(pLocale);
87
88 parser.setDate(x.c_str(), x.c_str() + x.size());
89 return parser.parseLocaleDate();
90 }
91
isDateTime(const std::string & x,LocaleInfo * pLocale)92 static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
93 DateTimeParser parser(pLocale);
94
95 parser.setDate(x.c_str(), x.c_str() + x.size());
96 bool ok = parser.parseISO8601();
97
98 if (!ok)
99 return false;
100
101 DateTime dt = parser.makeDateTime();
102 return dt.validDateTime();
103 }
104
guess_type__(cpp11::writable::strings & input,const cpp11::strings & na,LocaleInfo * pLocale,bool guess_integer=false)105 std::string guess_type__(
106 cpp11::writable::strings& input,
107 const cpp11::strings& na,
108 LocaleInfo* pLocale,
109 bool guess_integer = false) {
110
111 if (input.size() == 0) {
112 return "character";
113 }
114
115 if (allMissing(input)) {
116 return "logical";
117 }
118
119 for (R_xlen_t i = 0; i < input.size(); ++i) {
120 for (R_xlen_t j = 0; j < na.size(); ++j) {
121 if (STRING_ELT(input, i) == STRING_ELT(na, j)) {
122 input[i] = NA_STRING;
123 break;
124 }
125 }
126 }
127
128 // Work from strictest to most flexible
129 if (canParse(input, isLogical, pLocale))
130 return "logical";
131 if (guess_integer && canParse(input, isInteger, pLocale))
132 return "integer";
133 if (canParse(input, isDouble, pLocale))
134 return "double";
135 if (canParse(input, isNumber, pLocale))
136 return "number";
137 if (canParse(input, isTime, pLocale))
138 return "time";
139 if (canParse(input, isDate, pLocale))
140 return "date";
141 if (canParse(input, isDateTime, pLocale))
142 return "datetime";
143
144 // Otherwise can always parse as a character
145 return "character";
146 }
147
guess_type_(cpp11::writable::strings input,const cpp11::strings & na,const cpp11::list & locale,bool guess_integer=false)148 [[cpp11::register]] std::string guess_type_(
149 cpp11::writable::strings input,
150 const cpp11::strings& na,
151 const cpp11::list& locale,
152 bool guess_integer = false) {
153 LocaleInfo locale_(locale);
154 return guess_type__(input, na, &locale_, guess_integer);
155 }
156