1 #include "data.table.h"
2 
cj(SEXP base_list)3 SEXP cj(SEXP base_list) {
4   int ncol = LENGTH(base_list);
5   SEXP out = PROTECT(allocVector(VECSXP, ncol));
6   int nrow = 1;
7   // already confirmed to be less than .Machine$integer.max at R level
8   for (int j=0; j<ncol; ++j) nrow *= length(VECTOR_ELT(base_list, j));
9   int eachrep = 1;
10   for (int j=ncol-1; j>=0; --j) {
11     SEXP source = VECTOR_ELT(base_list, j), target;
12     SET_VECTOR_ELT(out, j, target=allocVector(TYPEOF(source), nrow));
13     copyMostAttrib(source, target);  // includes levels of factors, integer64, custom classes, etc
14     if (nrow==0) continue;  // one or more columns are empty so the result will be empty, #2511
15     int thislen = LENGTH(source);
16     int blocklen = thislen*eachrep;
17     int ncopy = nrow/blocklen;
18     switch(TYPEOF(source)) {
19     case LGLSXP:
20     case INTSXP: {
21       const int *restrict sourceP = INTEGER(source);
22       int *restrict targetP = INTEGER(target);
23       #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
24       // default static schedule so two threads won't write to same cache line in last column
25       // if they did write to same cache line (and will when last column's thislen is small) there's no correctness issue
26       for (int i=0; i<thislen; ++i) {
27         const int item = sourceP[i];
28         const int end = (i+1)*eachrep;
29         for (int j=i*eachrep; j<end; ++j) targetP[j] = item;  // no div, mod or read ops inside loop; just rep a const contiguous write
30       }
31       #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
32       for (int i=1; i<ncopy; ++i) {
33         memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(int));
34       }
35     } break;
36     case REALSXP: {
37       const double *restrict sourceP = REAL(source);
38       double *restrict targetP = REAL(target);
39       #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
40       for (int i=0; i<thislen; ++i) {
41         const double item = sourceP[i];
42         const int end=(i+1)*eachrep;
43         for (int j=i*eachrep; j<end; ++j) targetP[j] = item;
44       }
45       #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
46       for (int i=1; i<ncopy; ++i) {
47         memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(double));
48       }
49     } break;
50     case CPLXSXP: {
51       const Rcomplex *restrict sourceP = COMPLEX(source);
52       Rcomplex *restrict targetP = COMPLEX(target);
53       #pragma omp parallel for num_threads(getDTthreads(thislen*eachrep, true))
54       for (int i=0; i<thislen; ++i) {
55         const Rcomplex item = sourceP[i];
56         const int end=(i+1)*eachrep;
57         for (int j=i*eachrep; j<end; ++j) targetP[j] = item;
58       }
59       #pragma omp parallel for num_threads(getDTthreads(ncopy*blocklen, true))
60       for (int i=1; i<ncopy; ++i) {
61         memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(Rcomplex));
62       }
63     } break;
64     case STRSXP: {
65       const SEXP *sourceP = STRING_PTR(source);
66       int start = 0;
67       for (int i=0; i<ncopy; ++i) {
68         for (int j=0; j<thislen; ++j) {
69           const SEXP item = sourceP[j];
70           const int end = start+eachrep;
71           for (int k=start; k<end; ++k) SET_STRING_ELT(target, k, item);  // no div, mod, or read-API call to STRING_ELT
72           start = end;
73         }
74       }
75     } break;
76     case VECSXP: {
77       const SEXP *sourceP = SEXPPTR_RO(source);
78       int start = 0;
79       for (int i=0; i<ncopy; ++i) {
80         for (int j=0; j<thislen; ++j) {
81           const SEXP item = sourceP[j];
82           const int end = start+eachrep;
83           for (int k=start; k<end; ++k) SET_VECTOR_ELT(target, k, item);
84           start = end;
85         }
86       }
87     } break;
88     default:
89       error(_("Type '%s' not supported by CJ."), type2char(TYPEOF(source)));
90     }
91     eachrep *= thislen;
92   }
93   UNPROTECT(1);
94   return out;
95 }
96 
97