1 #include "cpp11/list.hpp"
2 #include "cpp11/sexp.hpp"
3 #include "cpp11/strings.hpp"
4 
5 #include "connection.h"
6 #include "grisu3.h"
7 #include <array>
8 #include <fstream>
9 #include <sstream>
10 
11 enum quote_escape_t { DOUBLE = 1, BACKSLASH = 2, NONE = 3 };
12 
13 void stream_delim(
14     const cpp11::sexp& connection,
15     const cpp11::sexp& x,
16     int i,
17     char delim,
18     const std::string& na,
19     quote_escape_t escape);
20 
stream_delim_row(const cpp11::sexp & connection,const cpp11::list & x,int i,char delim,const std::string & na,quote_escape_t escape,const char * eol)21 void stream_delim_row(
22     const cpp11::sexp& connection,
23     const cpp11::list& x,
24     int i,
25     char delim,
26     const std::string& na,
27     quote_escape_t escape,
28     const char* eol) {
29   int p = Rf_length(x);
30 
31   for (int j = 0; j < p - 1; ++j) {
32     stream_delim(connection, x.at(j), i, delim, na, escape);
33     write_bytes(connection, &delim, 1);
34   }
35   stream_delim(connection, x.at(p - 1), i, delim, na, escape);
36 
37   write_bytes(connection, eol, strlen(eol));
38 }
39 
needs_quote(const char * string,char delim,const std::string & na)40 bool needs_quote(const char* string, char delim, const std::string& na) {
41   if (string == na) {
42     return true;
43   }
44 
45   for (const char* cur = string; *cur != '\0'; ++cur) {
46     if (*cur == '\n' || *cur == '\r' || *cur == '"' || *cur == delim) {
47       return true;
48     }
49   }
50 
51   return false;
52 }
53 
stream_delim(const cpp11::sexp & connection,const char * string,char delim,const std::string & na,quote_escape_t escape)54 void stream_delim(
55     const cpp11::sexp& connection,
56     const char* string,
57     char delim,
58     const std::string& na,
59     quote_escape_t escape) {
60   bool quotes = needs_quote(string, delim, na);
61 
62   if (quotes) {
63     write_bytes(connection, "\"", 1);
64   }
65 
66   for (const char* cur = string; *cur != '\0'; ++cur) {
67     switch (*cur) {
68     case '"':
69       switch (escape) {
70       case DOUBLE:
71         write_bytes(connection, "\"\"", 2);
72         break;
73       case BACKSLASH:
74         write_bytes(connection, "\\\"", 2);
75         break;
76       case NONE:
77         write_bytes(connection, "\"", 1);
78         break;
79       }
80       break;
81     default:
82       write_bytes(connection, cur, 1);
83     }
84   }
85 
86   if (quotes) {
87     write_bytes(connection, "\"", 1);
88   }
89 }
90 
validate_col_type(SEXP x,const std::string & name)91 void validate_col_type(SEXP x, const std::string& name) {
92   switch (TYPEOF(x)) {
93   case LGLSXP:
94   case INTSXP:
95   case REALSXP:
96   case STRSXP:
97     break;
98   default:
99     cpp11::stop(
100         "Don't know how to handle vector of type %s in column '%s'.",
101         Rf_type2char(TYPEOF(x)),
102         name.c_str());
103   }
104 }
105 
stream_delim(const cpp11::sexp & connection,const cpp11::list & df,char delim,const std::string & na,bool col_names,bool bom,quote_escape_t escape,const char * eol)106 void stream_delim(
107     const cpp11::sexp& connection,
108     const cpp11::list& df,
109     char delim,
110     const std::string& na,
111     bool col_names,
112     bool bom,
113     quote_escape_t escape,
114     const char* eol) {
115   int p = Rf_length(df);
116   if (p == 0) {
117     return;
118   }
119 
120   if (bom) {
121     write_bytes(connection, "\xEF\xBB\xBF", 3);
122   }
123 
124   cpp11::strings names(df.attr("names"));
125   // Validate column types
126   for (int j = 0; j < p; ++j) {
127     validate_col_type(df.at(j), names[j]);
128   }
129 
130   if (col_names) {
131     cpp11::strings names(df.attr("names"));
132     for (int j = 0; j < p; ++j) {
133       stream_delim(connection, names, j, delim, na, escape);
134       if (j != p - 1) {
135         write_bytes(connection, &delim, 1);
136       }
137     }
138     write_bytes(connection, eol, strlen(eol));
139   }
140 
141   cpp11::sexp first_col = df[0];
142   int n = Rf_length(first_col);
143 
144   for (int i = 0; i < n; ++i) {
145     stream_delim_row(connection, df, i, delim, na, escape, eol);
146   }
147 }
148 
stream_delim_(const cpp11::list & df,const cpp11::sexp & connection,char delim,const std::string & na,bool col_names,bool bom,int quote_escape,const char * eol)149 [[cpp11::register]] void stream_delim_(
150     const cpp11::list& df,
151     const cpp11::sexp& connection,
152     char delim,
153     const std::string& na,
154     bool col_names,
155     bool bom,
156     int quote_escape,
157     const char* eol) {
158   stream_delim(
159       connection,
160       df,
161       delim,
162       na,
163       col_names,
164       bom,
165       static_cast<quote_escape_t>(quote_escape),
166       eol);
167 }
168 
169 // =============================================================================
170 // Derived from EncodeElementS in RPostgreSQL
171 // Written by: tomoakin@kenroku.kanazawa-u.ac.jp
172 // License: GPL-2
173 
stream_delim(const cpp11::sexp & connection,const cpp11::sexp & x,int i,char delim,const std::string & na,quote_escape_t escape)174 void stream_delim(
175     const cpp11::sexp& connection,
176     const cpp11::sexp& x,
177     int i,
178     char delim,
179     const std::string& na,
180     quote_escape_t escape) {
181   switch (TYPEOF(x)) {
182   case LGLSXP: {
183     int value = LOGICAL(x)[i];
184     if (value == TRUE) {
185       write_bytes(connection, "TRUE", 4);
186     } else if (value == FALSE) {
187       write_bytes(connection, "FALSE", 5);
188     } else {
189       write_bytes(connection, na.c_str(), na.size());
190     }
191     break;
192   }
193   case INTSXP: {
194     int value = INTEGER(x)[i];
195     if (value == NA_INTEGER) {
196       write_bytes(connection, na.c_str(), na.size());
197     } else {
198       std::array<char, 32> str;
199       int len = snprintf(str.data(), 32, "%i", value);
200       if (len > 32) {
201         cpp11::stop("integer too big");
202       }
203       write_bytes(connection, str.data(), len);
204     }
205     break;
206   }
207   case REALSXP: {
208     double value = REAL(x)[i];
209     if (!R_FINITE(value)) {
210       if (ISNA(value) || ISNAN(value)) {
211         write_bytes(connection, na.c_str(), na.size());
212       } else if (value > 0) {
213         write_bytes(connection, "Inf", 3);
214       } else {
215         write_bytes(connection, "-Inf", 4);
216       }
217     } else {
218       std::array<char, 32> str;
219       int len = dtoa_grisu3(value, str.data());
220       write_bytes(connection, str.data(), len);
221     }
222     break;
223   }
224   case STRSXP: {
225     if (STRING_ELT(x, i) == NA_STRING) {
226       write_bytes(connection, na.c_str(), na.size());
227     } else {
228       stream_delim(
229           connection,
230           Rf_translateCharUTF8(STRING_ELT(x, i)),
231           delim,
232           na,
233           escape);
234     }
235     break;
236   }
237   default:
238     cpp11::stop(
239         "Don't know how to handle vector of type %s.", Rf_type2char(TYPEOF(x)));
240   }
241 }
242