1 #include <cpp11/list.hpp>
2 #include <cpp11/strings.hpp>
3 
4 #include "DateTime.h"
5 #include "DateTimeParser.h"
6 #include "LocaleInfo.h"
7 #include "r_utils.h"
8 
9 #include "vroom_dbl.h"
10 #include "vroom_lgl.h"
11 #include "vroom_num.h"
12 
13 typedef bool (*canParseFun)(const std::string&, LocaleInfo* pLocale);
14 
canParse(const cpp11::strings & x,const canParseFun & canParse,LocaleInfo * pLocale)15 bool canParse(
16     const cpp11::strings& x, const canParseFun& canParse, LocaleInfo* pLocale) {
17   for (auto&& i : x) {
18     if (i == NA_STRING)
19       continue;
20 
21     if (i.size() == 0)
22       continue;
23 
24     if (!canParse(std::string(i), pLocale))
25       return false;
26   }
27   return true;
28 }
29 
allMissing(const cpp11::strings & x)30 bool allMissing(const cpp11::strings& x) {
31   for (auto&& i : x) {
32     if (i != NA_STRING && i.size() > 0)
33       return false;
34   }
35   return true;
36 }
37 
isLogical(const std::string & x,LocaleInfo *)38 bool isLogical(const std::string& x, LocaleInfo* /* pLocale */) {
39   const char* const str = x.data();
40   int res = parse_logical(str, str + x.size());
41   return res != NA_LOGICAL;
42 }
43 
isNumber(const std::string & x,LocaleInfo * pLocale)44 bool isNumber(const std::string& x, LocaleInfo* pLocale) {
45   // Leading zero not followed by decimal mark
46   if (x[0] == '0' && x.size() > 1 &&
47       !matches(x.data() + 1, x.data() + x.size(), pLocale->decimalMark_))
48     return false;
49 
50   auto str = vroom::string(x);
51   auto num = parse_num(str.begin(), str.end(), *pLocale, true);
52 
53   return !ISNA(num);
54 }
55 
isInteger(const std::string & x,LocaleInfo *)56 bool isInteger(const std::string& x, LocaleInfo* /* pLocale */) {
57   // Leading zero
58   if (x[0] == '0' && x.size() > 1)
59     return false;
60 
61   double res = 0;
62   std::string::const_iterator begin = x.begin(), end = x.end();
63 
64   return parseInt(begin, end, res) && begin == end;
65 }
66 
isDouble(const std::string & x,LocaleInfo * pLocale)67 bool isDouble(const std::string& x, LocaleInfo* pLocale) {
68   // Leading zero not followed by decimal mark
69   if (x[0] == '0' && x.size() > 1 && x[1] != pLocale->decimalMark_[0])
70     return false;
71 
72   double res =
73       bsd_strtod(x.data(), x.data() + x.size(), pLocale->decimalMark_[0]);
74 
75   return !ISNA(res);
76 }
77 
isTime(const std::string & x,LocaleInfo * pLocale)78 bool isTime(const std::string& x, LocaleInfo* pLocale) {
79   DateTimeParser parser(pLocale);
80 
81   parser.setDate(x.c_str(), x.c_str() + x.size());
82   return parser.parseLocaleTime();
83 }
84 
isDate(const std::string & x,LocaleInfo * pLocale)85 bool isDate(const std::string& x, LocaleInfo* pLocale) {
86   DateTimeParser parser(pLocale);
87 
88   parser.setDate(x.c_str(), x.c_str() + x.size());
89   return parser.parseLocaleDate();
90 }
91 
isDateTime(const std::string & x,LocaleInfo * pLocale)92 static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
93   DateTimeParser parser(pLocale);
94 
95   parser.setDate(x.c_str(), x.c_str() + x.size());
96   bool ok = parser.parseISO8601();
97 
98   if (!ok)
99     return false;
100 
101   DateTime dt = parser.makeDateTime();
102   return dt.validDateTime();
103 }
104 
guess_type__(cpp11::writable::strings & input,const cpp11::strings & na,LocaleInfo * pLocale,bool guess_integer=false)105 std::string guess_type__(
106     cpp11::writable::strings& input,
107     const cpp11::strings& na,
108     LocaleInfo* pLocale,
109     bool guess_integer = false) {
110 
111   if (input.size() == 0) {
112     return "character";
113   }
114 
115   if (allMissing(input)) {
116     return "logical";
117   }
118 
119   for (R_xlen_t i = 0; i < input.size(); ++i) {
120     for (R_xlen_t j = 0; j < na.size(); ++j) {
121       if (STRING_ELT(input, i) == STRING_ELT(na, j)) {
122         input[i] = NA_STRING;
123         break;
124       }
125     }
126   }
127 
128   // Work from strictest to most flexible
129   if (canParse(input, isLogical, pLocale))
130     return "logical";
131   if (guess_integer && canParse(input, isInteger, pLocale))
132     return "integer";
133   if (canParse(input, isDouble, pLocale))
134     return "double";
135   if (canParse(input, isNumber, pLocale))
136     return "number";
137   if (canParse(input, isTime, pLocale))
138     return "time";
139   if (canParse(input, isDate, pLocale))
140     return "date";
141   if (canParse(input, isDateTime, pLocale))
142     return "datetime";
143 
144   // Otherwise can always parse as a character
145   return "character";
146 }
147 
guess_type_(cpp11::writable::strings input,const cpp11::strings & na,const cpp11::list & locale,bool guess_integer=false)148 [[cpp11::register]] std::string guess_type_(
149     cpp11::writable::strings input,
150     const cpp11::strings& na,
151     const cpp11::list& locale,
152     bool guess_integer = false) {
153   LocaleInfo locale_(locale);
154   return guess_type__(input, na, &locale_, guess_integer);
155 }
156