1 #ifndef FASTREAD_COLLECTOR_H_
2 #define FASTREAD_COLLECTOR_H_
3 #include "cpp11/doubles.hpp"
4 #include "cpp11/integers.hpp"
5 #include "cpp11/list.hpp"
6 #include "cpp11/logicals.hpp"
7 #include "cpp11/strings.hpp"
8 
9 #include "DateTime.h"
10 #include "DateTimeParser.h"
11 #include "Iconv.h"
12 #include "LocaleInfo.h"
13 #include "Token.h"
14 #include "Warnings.h"
15 #include <map>
16 #include <memory>
17 
18 class Collector;
19 typedef std::shared_ptr<Collector> CollectorPtr;
20 
21 class Collector {
22 protected:
23   cpp11::sexp column_;
24   Warnings* pWarnings_;
25 
26   int n_;
27 
28 public:
29   Collector(SEXP column, Warnings* pWarnings = NULL)
column_(column)30       : column_(column), pWarnings_(pWarnings), n_(0) {}
31 
~Collector()32   virtual ~Collector(){};
33 
34   virtual void setValue(int i, const Token& t) = 0;
setValue(int i,const std::string & s)35   virtual void setValue(int i, const std::string& s){}; // nocov
setValue(int i,size_t st)36   virtual void setValue(int i, size_t st){};            // nocov
37 
vector()38   virtual cpp11::sexp vector() { return column_; };
39 
skip()40   virtual bool skip() { return false; }
41 
size()42   int size() { return n_; }
43 
resize(int n)44   void resize(int n) {
45     if (n == n_)
46       return;
47 
48     if (column_ == R_NilValue)
49       return;
50 
51 #if R_VERSION >= R_Version(3, 4, 0)
52     if (n > 0 && n < n_) {
53       SET_TRUELENGTH(column_, n_);
54       SETLENGTH(column_, n);
55       SET_GROWABLE_BIT(column_);
56     } else {
57       column_ = Rf_lengthgets(column_, n);
58     }
59 #else
60     column_ = Rf_lengthgets(column_, n);
61 #endif
62 
63     n_ = n;
64   }
65 
clear()66   void clear() { resize(0); }
67 
setWarnings(Warnings * pWarnings)68   void setWarnings(Warnings* pWarnings) { pWarnings_ = pWarnings; }
69 
warn(int row,int col,std::string expected,std::string actual)70   inline void warn(int row, int col, std::string expected, std::string actual) {
71     if (pWarnings_ == NULL) {
72       cpp11::warning(
73           "[%i, %i]: expected %s, but got '%s'",
74           row + 1,
75           col + 1,
76           expected.c_str(),
77           actual.c_str());
78       return;
79     }
80 
81     pWarnings_->addWarning(row, col, expected, actual);
82   }
83   inline void
warn(int row,int col,std::string expected,SourceIterators actual)84   warn(int row, int col, std::string expected, SourceIterators actual) {
85     warn(row, col, expected, std::string(actual.first, actual.second));
86   }
87 
88   static CollectorPtr create(const cpp11::list& spec, LocaleInfo* pLocale);
89 };
90 
91 // Character -------------------------------------------------------------------
92 
93 class CollectorCharacter : public Collector {
94   Iconv* pEncoder_;
95 
96 public:
CollectorCharacter(Iconv * pEncoder)97   CollectorCharacter(Iconv* pEncoder)
98       : Collector(cpp11::writable::strings(R_xlen_t(0))), pEncoder_(pEncoder) {}
99   void setValue(int i, const Token& t);
100   void setValue(int i, const std::string& s);
101 };
102 
103 // Date ------------------------------------------------------------------------
104 
105 class CollectorDate : public Collector {
106   std::string format_;
107   DateTimeParser parser_;
108 
109 public:
CollectorDate(LocaleInfo * pLocale,const std::string & format)110   CollectorDate(LocaleInfo* pLocale, const std::string& format)
111       : Collector(cpp11::writable::doubles(R_xlen_t(0))),
112         format_(format),
113         parser_(pLocale) {}
114 
115   void setValue(int i, const Token& t);
116 
vector()117   cpp11::sexp vector() {
118     column_.attr("class") = "Date";
119     return column_;
120   };
121 };
122 
123 // Date time -------------------------------------------------------------------
124 
125 class CollectorDateTime : public Collector {
126   std::string format_;
127   DateTimeParser parser_;
128   std::string tz_;
129 
130 public:
CollectorDateTime(LocaleInfo * pLocale,const std::string & format)131   CollectorDateTime(LocaleInfo* pLocale, const std::string& format)
132       : Collector(cpp11::writable::doubles(R_xlen_t(0))),
133         format_(format),
134         parser_(pLocale),
135         tz_(pLocale->tz_) {}
136 
137   void setValue(int i, const Token& t);
138 
vector()139   cpp11::sexp vector() {
140     column_.attr("class") = {"POSIXct", "POSIXt"};
141     column_.attr("tzone") = tz_;
142     return column_;
143   };
144 };
145 
146 class CollectorDouble : public Collector {
147   char decimalMark_;
148 
149 public:
CollectorDouble(char decimalMark)150   CollectorDouble(char decimalMark)
151       : Collector(cpp11::writable::doubles(R_xlen_t(0))),
152         decimalMark_(decimalMark) {}
153   void setValue(int i, const Token& t);
154   void setValue(int i, size_t st);
155 };
156 
157 class CollectorFactor : public Collector {
158   Iconv* pEncoder_;
159   std::vector<cpp11::r_string> levels_;
160   std::map<cpp11::r_string, int> levelset_;
161   bool ordered_, implicitLevels_, includeNa_;
162   std::string buffer_;
163 
164   void insert(int i, const cpp11::r_string& str, const Token& t);
165 
166 public:
CollectorFactor(Iconv * pEncoder,cpp11::sexp levels,bool ordered,bool includeNa)167   CollectorFactor(
168       Iconv* pEncoder, cpp11::sexp levels, bool ordered, bool includeNa)
169       : Collector(cpp11::writable::integers(R_xlen_t(0))),
170         pEncoder_(pEncoder),
171         ordered_(ordered),
172         includeNa_(includeNa) {
173     implicitLevels_ = levels == R_NilValue;
174     if (!implicitLevels_) {
175       cpp11::strings lvls(levels);
176       int n = lvls.size();
177 
178       for (int i = 0; i < n; ++i) {
179         cpp11::r_string std_level;
180         if (STRING_ELT(lvls, i) != NA_STRING) {
181           const char* level = Rf_translateCharUTF8(STRING_ELT(lvls, i));
182           std_level = level;
183         } else {
184           std_level = NA_STRING;
185         }
186         levels_.push_back(std_level);
187         levelset_.insert(std::make_pair(std_level, i));
188       }
189     }
190   }
191   void setValue(int i, const Token& t);
192 
vector()193   cpp11::sexp vector() {
194     if (ordered_) {
195       column_.attr("class") = {"ordered", "factor"};
196     } else {
197       column_.attr("class") = "factor";
198     }
199 
200     int n = levels_.size();
201     cpp11::writable::strings levels(n);
202     for (int i = 0; i < n; ++i) {
203       levels[i] = levels_[i];
204     }
205 
206     column_.attr("levels") = levels;
207     return column_;
208   };
209 };
210 
211 class CollectorInteger : public Collector {
212 public:
CollectorInteger()213   CollectorInteger() : Collector(cpp11::writable::integers(R_xlen_t(0))) {}
214   void setValue(int i, const Token& t);
215 };
216 
217 class CollectorLogical : public Collector {
218 public:
CollectorLogical()219   CollectorLogical() : Collector(cpp11::writable::logicals(R_xlen_t(0))) {}
220   void setValue(int i, const Token& t);
221 };
222 
223 class CollectorNumeric : public Collector {
224   char decimalMark_, groupingMark_;
225 
226 public:
CollectorNumeric(char decimalMark,char groupingMark)227   CollectorNumeric(char decimalMark, char groupingMark)
228       : Collector(cpp11::writable::doubles(R_xlen_t(0))),
229         decimalMark_(decimalMark),
230         groupingMark_(groupingMark) {}
231   void setValue(int i, const Token& t);
232   bool isNum(char c);
233 };
234 
235 // Time ---------------------------------------------------------------------
236 
237 class CollectorTime : public Collector {
238   std::string format_;
239   DateTimeParser parser_;
240 
241 public:
CollectorTime(LocaleInfo * pLocale,const std::string & format)242   CollectorTime(LocaleInfo* pLocale, const std::string& format)
243       : Collector(cpp11::writable::doubles(R_xlen_t(0))),
244         format_(format),
245         parser_(pLocale) {}
246 
247   void setValue(int i, const Token& t);
248 
vector()249   cpp11::sexp vector() {
250     column_.attr("class") = {"hms", "difftime"};
251     column_.attr("units") = "secs";
252     return column_;
253   };
254 };
255 
256 // Skip ---------------------------------------------------------------------
257 
258 class CollectorSkip : public Collector {
259 public:
CollectorSkip()260   CollectorSkip() : Collector(R_NilValue) {}
setValue(int i,const Token & t)261   void setValue(int i, const Token& t) {}
skip()262   bool skip() { return true; }
263 };
264 
265 // Raw -------------------------------------------------------------------------
266 class CollectorRaw : public Collector {
267 public:
CollectorRaw()268   CollectorRaw() : Collector(cpp11::writable::list(static_cast<R_xlen_t>(0))) {}
269   void setValue(int i, const Token& t);
270 };
271 
272 // Helpers ---------------------------------------------------------------------
273 
274 std::vector<CollectorPtr>
275 collectorsCreate(const cpp11::list& specs, LocaleInfo* pLocale);
276 void collectorsResize(std::vector<CollectorPtr>& collectors, int n);
277 void collectorsClear(std::vector<CollectorPtr>& collectors);
278 std::string collectorGuess(
279     const cpp11::strings& input,
280     const cpp11::list& locale_,
281     bool guessInteger = false);
282 
283 #endif
284