1 #include "data.table.h"
2
cj(SEXP base_list)3 SEXP cj(SEXP base_list) {
4 int ncol = LENGTH(base_list);
5 SEXP out = PROTECT(allocVector(VECSXP, ncol));
6 int nrow = 1;
7 // already confirmed to be less than .Machine$integer.max at R level
8 for (int j=0; j<ncol; ++j) nrow *= length(VECTOR_ELT(base_list, j));
9 int eachrep = 1;
10 for (int j=ncol-1; j>=0; --j) {
11 SEXP source = VECTOR_ELT(base_list, j), target;
12 SET_VECTOR_ELT(out, j, target=allocVector(TYPEOF(source), nrow));
13 copyMostAttrib(source, target); // includes levels of factors, integer64, custom classes, etc
14 if (nrow==0) continue; // one or more columns are empty so the result will be empty, #2511
15 int thislen = LENGTH(source);
16 int blocklen = thislen*eachrep;
17 int ncopy = nrow/blocklen;
18 switch(TYPEOF(source)) {
19 case LGLSXP:
20 case INTSXP: {
21 const int *restrict sourceP = INTEGER(source);
22 int *restrict targetP = INTEGER(target);
23 #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
24 // default static schedule so two threads won't write to same cache line in last column
25 // if they did write to same cache line (and will when last column's thislen is small) there's no correctness issue
26 for (int i=0; i<thislen; ++i) {
27 const int item = sourceP[i];
28 const int end = (i+1)*eachrep;
29 for (int j=i*eachrep; j<end; ++j) targetP[j] = item; // no div, mod or read ops inside loop; just rep a const contiguous write
30 }
31 #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
32 for (int i=1; i<ncopy; ++i) {
33 memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(int));
34 }
35 } break;
36 case REALSXP: {
37 const double *restrict sourceP = REAL(source);
38 double *restrict targetP = REAL(target);
39 #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
40 for (int i=0; i<thislen; ++i) {
41 const double item = sourceP[i];
42 const int end=(i+1)*eachrep;
43 for (int j=i*eachrep; j<end; ++j) targetP[j] = item;
44 }
45 #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
46 for (int i=1; i<ncopy; ++i) {
47 memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(double));
48 }
49 } break;
50 case CPLXSXP: {
51 const Rcomplex *restrict sourceP = COMPLEX(source);
52 Rcomplex *restrict targetP = COMPLEX(target);
53 #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
54 for (int i=0; i<thislen; ++i) {
55 const Rcomplex item = sourceP[i];
56 const int end=(i+1)*eachrep;
57 for (int j=i*eachrep; j<end; ++j) targetP[j] = item;
58 }
59 #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
60 for (int i=1; i<ncopy; ++i) {
61 memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(Rcomplex));
62 }
63 } break;
64 case STRSXP: {
65 const SEXP *sourceP = STRING_PTR(source);
66 int start = 0;
67 for (int i=0; i<ncopy; ++i) {
68 for (int j=0; j<thislen; ++j) {
69 const SEXP item = sourceP[j];
70 const int end = start+eachrep;
71 for (int k=start; k<end; ++k) SET_STRING_ELT(target, k, item); // no div, mod, or read-API call to STRING_ELT
72 start = end;
73 }
74 }
75 } break;
76 case VECSXP: {
77 const SEXP *sourceP = SEXPPTR_RO(source);
78 int start = 0;
79 for (int i=0; i<ncopy; ++i) {
80 for (int j=0; j<thislen; ++j) {
81 const SEXP item = sourceP[j];
82 const int end = start+eachrep;
83 for (int k=start; k<end; ++k) SET_VECTOR_ELT(target, k, item);
84 start = end;
85 }
86 }
87 } break;
88 default:
89 error(_("Type '%s' not supported by CJ."), type2char(TYPEOF(source)));
90 }
91 eachrep *= thislen;
92 }
93 UNPROTECT(1);
94 return out;
95 }
96
97