1 #include <cstring>
2
3 #include "cpp11/R.hpp"
4
5 #include "cpp11/protect.hpp"
6 #include "cpp11/sexp.hpp"
7 #include "cpp11/integers.hpp"
8 #include "cpp11/strings.hpp"
9 #include "cpp11/list.hpp"
10 #include "cpp11/data_frame.hpp"
11
12 #if R_VERSION < R_Version(3, 5, 0)
13
DATAPTR(SEXP x)14 void* DATAPTR(SEXP x) {
15 switch(TYPEOF(x)) {
16 case STRSXP:
17 return (char*) CHAR(x);
18 break;
19 case LGLSXP:
20 return (char*) LOGICAL(x);
21 case INTSXP:
22 return (char*) INTEGER(x);
23 case RAWSXP:
24 return (char*) RAW(x);
25 case CPLXSXP:
26 return (char*) COMPLEX(x);
27 case REALSXP:
28 return (char*) REAL(x);
29 default:
30 cpp11::stop("Invalid type %s", Rf_type2char(TYPEOF(x)));
31 }
32 return nullptr;
33 }
34
35 #endif
36
37 // A debug macro -- change to 'debug(x) x' for debug output
38 #define debug(x)
39
40 // An optimized rep
41 #define DO_REP(RTYPE, CTYPE, ACCESSOR) \
42 { \
43 for (int i = 0; i < n; ++i) { \
44 memcpy((char*)ACCESSOR(output) + i * xn * sizeof(CTYPE), \
45 (char*)ACCESSOR(x), \
46 sizeof(CTYPE) * xn); \
47 } \
48 }
49
rep_(SEXP x,int n,std::string var_name)50 SEXP rep_(SEXP x, int n, std::string var_name) {
51 if (!Rf_isVectorAtomic(x) && TYPEOF(x) != VECSXP) {
52 cpp11::stop("All columns must be atomic vectors or lists. Problem with '%s'", var_name.c_str());
53 }
54
55 if (Rf_inherits(x, "POSIXlt")) {
56 cpp11::stop("'%s' is a POSIXlt. Please convert to POSIXct.", var_name.c_str());
57 }
58
59 int xn = Rf_length(x);
60 int nout = xn * n;
61
62 cpp11::sexp output(Rf_allocVector(TYPEOF(x), nout));
63 switch (TYPEOF(x)) {
64 case INTSXP:
65 DO_REP(INTSXP, int, INTEGER);
66 break;
67 case REALSXP:
68 DO_REP(REALSXP, double, REAL);
69 break;
70 case LGLSXP:
71 DO_REP(LGLSXP, int, LOGICAL);
72 break;
73 case CPLXSXP:
74 DO_REP(CPLXSXP, Rcomplex, COMPLEX);
75 break;
76 case RAWSXP:
77 DO_REP(RAWSXP, Rbyte, RAW);
78 break;
79 case STRSXP: {
80 int counter = 0;
81 for (int i = 0; i < n; ++i) {
82 for (int j = 0; j < xn; ++j) {
83 SET_STRING_ELT(output, counter, STRING_ELT(x, j));
84 ++counter;
85 }
86 }
87 break;
88 }
89 case VECSXP: {
90 int counter = 0;
91 for (int i = 0; i < n; ++i) {
92 for (int j = 0; j < xn; ++j) {
93 SET_VECTOR_ELT(output, counter, VECTOR_ELT(x, j));
94 ++counter;
95 }
96 }
97 break;
98 }
99 default: {
100 cpp11::stop("Unhandled RTYPE in '%s'", var_name.c_str());
101 return R_NilValue;
102 }
103 }
104
105 Rf_copyMostAttrib(x, output);
106 return output;
107 }
108
109 // Optimized factor routine for the case where we want to make
110 // a factor from a vector of names -- used for generating the
111 // 'variable' column in the melted data.frame
make_variable_column_factor(cpp11::strings x,int nrow)112 cpp11::integers make_variable_column_factor(cpp11::strings x, int nrow) {
113 cpp11::writable::integers output(x.size() * nrow);
114
115 int idx = 0;
116 for (int i = 0; i < x.size(); ++i)
117 for (int j = 0; j < nrow; ++j)
118 output[idx++] = i + 1;
119
120 output.attr("levels") = x;
121 output.attr("class") = "factor";
122 return output;
123 }
124
make_variable_column_character(cpp11::strings x,int nrow)125 cpp11::strings make_variable_column_character(cpp11::strings x, int nrow) {
126 cpp11::writable::strings output(x.size() * nrow);
127
128 int idx = 0;
129 for (int i = 0; i < x.size(); ++i)
130 for (int j = 0; j < nrow; ++j)
131 output[idx++] = x[i];
132
133 return output;
134 }
135
136 // Concatenate vectors for the 'value' column
137 #define DO_CONCATENATE(CTYPE) \
138 { \
139 memcpy((char*)DATAPTR(output) + i* nrow * sizeof(CTYPE), \
140 (char*)DATAPTR(tmp), \
141 nrow * sizeof(CTYPE)); \
142 break; \
143 }
144
concatenate(const cpp11::data_frame & x,cpp11::integers ind,bool factorsAsStrings)145 SEXP concatenate(const cpp11::data_frame& x, cpp11::integers ind, bool factorsAsStrings) {
146
147 int nrow = x.nrow();
148 int n_ind = ind.size();
149
150 // We coerce up to the 'max type' if necessary, using the fact
151 // that R's SEXPTYPEs are also ordered in terms of 'precision'
152 // Note: we convert factors to characters if necessary
153 int max_type = 0;
154 int ctype = 0;
155 for (int i = 0; i < n_ind; ++i) {
156
157 if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) {
158 ctype = STRSXP;
159 } else {
160 ctype = TYPEOF(x[ind[i]]);
161 }
162 max_type = ctype > max_type ? ctype : max_type;
163 }
164
165 debug(printf("Max type of value variables is %s\n", Rf_type2char(max_type)));
166
167 cpp11::sexp tmp;
168 cpp11::sexp output(Rf_allocVector(max_type, nrow * n_ind));
169 for (int i = 0; i < n_ind; ++i) {
170 SEXP col = x[ind[i]];
171
172 if (Rf_inherits(col, "POSIXlt")) {
173 cpp11::stop("Column %i is a POSIXlt. Please convert to POSIXct.", i + 1);
174 }
175
176 // a 'tmp' pointer to the current column being iterated over, or
177 // a coerced version if necessary
178 if (TYPEOF(col) == max_type) {
179 tmp = col;
180 } else if (Rf_isFactor(col) and factorsAsStrings) {
181 tmp = Rf_asCharacterFactor(col);
182 } else {
183 tmp = Rf_coerceVector(col, max_type);
184 }
185
186 switch (max_type) {
187 case INTSXP:
188 DO_CONCATENATE(int);
189 case REALSXP:
190 DO_CONCATENATE(double);
191 case LGLSXP:
192 DO_CONCATENATE(int);
193 case CPLXSXP:
194 DO_CONCATENATE(Rcomplex);
195 case STRSXP: {
196 for (int j = 0; j < nrow; ++j) {
197 SET_STRING_ELT(output, i * nrow + j, STRING_ELT(tmp, j));
198 }
199 break;
200 }
201 case VECSXP: {
202 for (int j = 0; j < nrow; ++j) {
203 SET_VECTOR_ELT(output, i * nrow + j, VECTOR_ELT(tmp, j));
204 }
205 break;
206 }
207 default:
208 cpp11::stop("All columns be atomic vectors or lists (not %s)", Rf_type2char(max_type));
209 }
210 }
211
212 return output;
213 }
214
215 [[cpp11::register]]
melt_dataframe(cpp11::data_frame data,const cpp11::integers & id_ind,const cpp11::integers & measure_ind,cpp11::strings variable_name,cpp11::strings value_name,cpp11::sexp attrTemplate,bool factorsAsStrings,bool valueAsFactor,bool variableAsFactor)216 cpp11::list melt_dataframe(cpp11::data_frame data,
217 const cpp11::integers& id_ind,
218 const cpp11::integers& measure_ind,
219 cpp11::strings variable_name,
220 cpp11::strings value_name,
221 cpp11::sexp attrTemplate,
222 bool factorsAsStrings,
223 bool valueAsFactor,
224 bool variableAsFactor) {
225
226 int nrow = data.nrow();
227
228 cpp11::strings data_names(data.attr("names"));
229
230 int n_id = id_ind.size();
231 debug(Rprintf("n_id == %i\n", n_id));
232
233 int n_measure = measure_ind.size();
234 debug(Rprintf("n_measure == %i\n", n_measure));
235
236 // Don't melt if the value variables are non-atomic
237 for (int i = 0; i < n_measure; ++i) {
238 if (!Rf_isVector(data[measure_ind[i]]) || Rf_inherits(data[measure_ind[i]], "data.frame")) {
239 cpp11::stop("All columns must be atomic vectors or lists. Problem with column %i.", measure_ind[i] + 1);
240 }
241 }
242
243 // The output should be a data.frame with:
244 // number of columns == number of id vars + 'variable' + 'value',
245 // with number of rows == data.nrow() * number of value vars
246 cpp11::writable::list output(n_id + 2);
247
248 // First, allocate the ID variables
249 // we repeat each ID vector n_measure times
250 for (int i = 0; i < n_id; ++i) {
251 SEXP object = data[id_ind[i]];
252 std::string var_name = std::string(data_names[id_ind[i]]);
253 output[i] = rep_(object, n_measure, var_name);
254 }
255
256 // Now, we assign the 'variable' and 'value' columns
257
258 // 'variable' is made up of repeating the names of the 'measure' variables,
259 // each nrow times. We want this to be a factor as well.
260 cpp11::writable::strings id_names(n_measure);
261 for (int i = 0; i < n_measure; ++i) {
262 id_names[i] = data_names[measure_ind[i]];
263 }
264 if (variableAsFactor) {
265 output[n_id] = make_variable_column_factor(id_names, nrow);
266 } else {
267 output[n_id] = make_variable_column_character(id_names, nrow);
268 }
269
270 // 'value' is made by concatenating each of the 'value' variables
271 output[n_id + 1] = concatenate(data, measure_ind, factorsAsStrings);
272 if (!Rf_isNull(attrTemplate)) {
273 Rf_copyMostAttrib(attrTemplate, output[n_id + 1]);
274 }
275
276 // Make the List more data.frame like
277
278 // Set the row names
279 output.attr("row.names") = {NA_INTEGER, -(nrow * n_measure)};
280
281 // Set the names
282 cpp11::writable::strings out_names(n_id + 2);
283 for (int i = 0; i < n_id; ++i) {
284 out_names[i] = data_names[id_ind[i]];
285 }
286 out_names[n_id] = variable_name[0];
287 out_names[n_id + 1] = value_name[0];
288 output.attr("names") = out_names;
289
290 // Set the class
291 output.attr("class") = "data.frame";
292
293 return output;
294 }
295