1 #ifndef FASTREAD_COLLECTOR_H_ 2 #define FASTREAD_COLLECTOR_H_ 3 #include "cpp11/doubles.hpp" 4 #include "cpp11/integers.hpp" 5 #include "cpp11/list.hpp" 6 #include "cpp11/logicals.hpp" 7 #include "cpp11/strings.hpp" 8 9 #include "DateTime.h" 10 #include "DateTimeParser.h" 11 #include "Iconv.h" 12 #include "LocaleInfo.h" 13 #include "Token.h" 14 #include "Warnings.h" 15 #include <map> 16 #include <memory> 17 18 class Collector; 19 typedef std::shared_ptr<Collector> CollectorPtr; 20 21 class Collector { 22 protected: 23 cpp11::sexp column_; 24 Warnings* pWarnings_; 25 26 int n_; 27 28 public: 29 Collector(SEXP column, Warnings* pWarnings = NULL) column_(column)30 : column_(column), pWarnings_(pWarnings), n_(0) {} 31 ~Collector()32 virtual ~Collector(){}; 33 34 virtual void setValue(int i, const Token& t) = 0; setValue(int i,const std::string & s)35 virtual void setValue(int i, const std::string& s){}; // nocov setValue(int i,size_t st)36 virtual void setValue(int i, size_t st){}; // nocov 37 vector()38 virtual cpp11::sexp vector() { return column_; }; 39 skip()40 virtual bool skip() { return false; } 41 size()42 int size() { return n_; } 43 resize(int n)44 void resize(int n) { 45 if (n == n_) 46 return; 47 48 if (column_ == R_NilValue) 49 return; 50 51 #if R_VERSION >= R_Version(3, 4, 0) 52 if (n > 0 && n < n_) { 53 SET_TRUELENGTH(column_, n_); 54 SETLENGTH(column_, n); 55 SET_GROWABLE_BIT(column_); 56 } else { 57 column_ = Rf_lengthgets(column_, n); 58 } 59 #else 60 column_ = Rf_lengthgets(column_, n); 61 #endif 62 63 n_ = n; 64 } 65 clear()66 void clear() { resize(0); } 67 setWarnings(Warnings * pWarnings)68 void setWarnings(Warnings* pWarnings) { pWarnings_ = pWarnings; } 69 warn(int row,int col,std::string expected,std::string actual)70 inline void warn(int row, int col, std::string expected, std::string actual) { 71 if (pWarnings_ == NULL) { 72 cpp11::warning( 73 "[%i, %i]: expected %s, but got '%s'", 74 row + 1, 75 col + 1, 76 expected.c_str(), 77 actual.c_str()); 78 return; 79 } 80 81 pWarnings_->addWarning(row, col, expected, actual); 82 } 83 inline void warn(int row,int col,std::string expected,SourceIterators actual)84 warn(int row, int col, std::string expected, SourceIterators actual) { 85 warn(row, col, expected, std::string(actual.first, actual.second)); 86 } 87 88 static CollectorPtr create(const cpp11::list& spec, LocaleInfo* pLocale); 89 }; 90 91 // Character ------------------------------------------------------------------- 92 93 class CollectorCharacter : public Collector { 94 Iconv* pEncoder_; 95 96 public: CollectorCharacter(Iconv * pEncoder)97 CollectorCharacter(Iconv* pEncoder) 98 : Collector(cpp11::writable::strings(R_xlen_t(0))), pEncoder_(pEncoder) {} 99 void setValue(int i, const Token& t); 100 void setValue(int i, const std::string& s); 101 }; 102 103 // Date ------------------------------------------------------------------------ 104 105 class CollectorDate : public Collector { 106 std::string format_; 107 DateTimeParser parser_; 108 109 public: CollectorDate(LocaleInfo * pLocale,const std::string & format)110 CollectorDate(LocaleInfo* pLocale, const std::string& format) 111 : Collector(cpp11::writable::doubles(R_xlen_t(0))), 112 format_(format), 113 parser_(pLocale) {} 114 115 void setValue(int i, const Token& t); 116 vector()117 cpp11::sexp vector() { 118 column_.attr("class") = "Date"; 119 return column_; 120 }; 121 }; 122 123 // Date time ------------------------------------------------------------------- 124 125 class CollectorDateTime : public Collector { 126 std::string format_; 127 DateTimeParser parser_; 128 std::string tz_; 129 130 public: CollectorDateTime(LocaleInfo * pLocale,const std::string & format)131 CollectorDateTime(LocaleInfo* pLocale, const std::string& format) 132 : Collector(cpp11::writable::doubles(R_xlen_t(0))), 133 format_(format), 134 parser_(pLocale), 135 tz_(pLocale->tz_) {} 136 137 void setValue(int i, const Token& t); 138 vector()139 cpp11::sexp vector() { 140 column_.attr("class") = {"POSIXct", "POSIXt"}; 141 column_.attr("tzone") = tz_; 142 return column_; 143 }; 144 }; 145 146 class CollectorDouble : public Collector { 147 char decimalMark_; 148 149 public: CollectorDouble(char decimalMark)150 CollectorDouble(char decimalMark) 151 : Collector(cpp11::writable::doubles(R_xlen_t(0))), 152 decimalMark_(decimalMark) {} 153 void setValue(int i, const Token& t); 154 void setValue(int i, size_t st); 155 }; 156 157 class CollectorFactor : public Collector { 158 Iconv* pEncoder_; 159 std::vector<cpp11::r_string> levels_; 160 std::map<cpp11::r_string, int> levelset_; 161 bool ordered_, implicitLevels_, includeNa_; 162 std::string buffer_; 163 164 void insert(int i, const cpp11::r_string& str, const Token& t); 165 166 public: CollectorFactor(Iconv * pEncoder,cpp11::sexp levels,bool ordered,bool includeNa)167 CollectorFactor( 168 Iconv* pEncoder, cpp11::sexp levels, bool ordered, bool includeNa) 169 : Collector(cpp11::writable::integers(R_xlen_t(0))), 170 pEncoder_(pEncoder), 171 ordered_(ordered), 172 includeNa_(includeNa) { 173 implicitLevels_ = levels == R_NilValue; 174 if (!implicitLevels_) { 175 cpp11::strings lvls(levels); 176 int n = lvls.size(); 177 178 for (int i = 0; i < n; ++i) { 179 cpp11::r_string std_level; 180 if (STRING_ELT(lvls, i) != NA_STRING) { 181 const char* level = Rf_translateCharUTF8(STRING_ELT(lvls, i)); 182 std_level = level; 183 } else { 184 std_level = NA_STRING; 185 } 186 levels_.push_back(std_level); 187 levelset_.insert(std::make_pair(std_level, i)); 188 } 189 } 190 } 191 void setValue(int i, const Token& t); 192 vector()193 cpp11::sexp vector() { 194 if (ordered_) { 195 column_.attr("class") = {"ordered", "factor"}; 196 } else { 197 column_.attr("class") = "factor"; 198 } 199 200 int n = levels_.size(); 201 cpp11::writable::strings levels(n); 202 for (int i = 0; i < n; ++i) { 203 levels[i] = levels_[i]; 204 } 205 206 column_.attr("levels") = levels; 207 return column_; 208 }; 209 }; 210 211 class CollectorInteger : public Collector { 212 public: CollectorInteger()213 CollectorInteger() : Collector(cpp11::writable::integers(R_xlen_t(0))) {} 214 void setValue(int i, const Token& t); 215 }; 216 217 class CollectorLogical : public Collector { 218 public: CollectorLogical()219 CollectorLogical() : Collector(cpp11::writable::logicals(R_xlen_t(0))) {} 220 void setValue(int i, const Token& t); 221 }; 222 223 class CollectorNumeric : public Collector { 224 char decimalMark_, groupingMark_; 225 226 public: CollectorNumeric(char decimalMark,char groupingMark)227 CollectorNumeric(char decimalMark, char groupingMark) 228 : Collector(cpp11::writable::doubles(R_xlen_t(0))), 229 decimalMark_(decimalMark), 230 groupingMark_(groupingMark) {} 231 void setValue(int i, const Token& t); 232 bool isNum(char c); 233 }; 234 235 // Time --------------------------------------------------------------------- 236 237 class CollectorTime : public Collector { 238 std::string format_; 239 DateTimeParser parser_; 240 241 public: CollectorTime(LocaleInfo * pLocale,const std::string & format)242 CollectorTime(LocaleInfo* pLocale, const std::string& format) 243 : Collector(cpp11::writable::doubles(R_xlen_t(0))), 244 format_(format), 245 parser_(pLocale) {} 246 247 void setValue(int i, const Token& t); 248 vector()249 cpp11::sexp vector() { 250 column_.attr("class") = {"hms", "difftime"}; 251 column_.attr("units") = "secs"; 252 return column_; 253 }; 254 }; 255 256 // Skip --------------------------------------------------------------------- 257 258 class CollectorSkip : public Collector { 259 public: CollectorSkip()260 CollectorSkip() : Collector(R_NilValue) {} setValue(int i,const Token & t)261 void setValue(int i, const Token& t) {} skip()262 bool skip() { return true; } 263 }; 264 265 // Raw ------------------------------------------------------------------------- 266 class CollectorRaw : public Collector { 267 public: CollectorRaw()268 CollectorRaw() : Collector(cpp11::writable::list(static_cast<R_xlen_t>(0))) {} 269 void setValue(int i, const Token& t); 270 }; 271 272 // Helpers --------------------------------------------------------------------- 273 274 std::vector<CollectorPtr> 275 collectorsCreate(const cpp11::list& specs, LocaleInfo* pLocale); 276 void collectorsResize(std::vector<CollectorPtr>& collectors, int n); 277 void collectorsClear(std::vector<CollectorPtr>& collectors); 278 std::string collectorGuess( 279 const cpp11::strings& input, 280 const cpp11::list& locale_, 281 bool guessInteger = false); 282 283 #endif 284