1 #include <R.h>
2 #include <Rdefines.h>
3
4 //#define __DEBUG
5
6 // Bit operations on compound variables.
_ior(int * x,int * y,int * r,int l)7 static void _ior(int *x, int *y, int *r, int l)
8 {
9 while (l-- > 0)
10 r[l] = x[l] | y[l];
11 }
12
_xor(int * x,int * y,int * r,int l)13 static void _xor(int *x, int *y, int *r, int l)
14 {
15 while (l-- > 0)
16 r[l] = x[l] ^ y[l];
17 }
18
_and(int * x,int * y,int * r,int l)19 static void _and(int *x, int *y, int *r, int l)
20 {
21 while (l-- > 0)
22 r[l] = x[l] & y[l];
23 }
24
25 typedef void (*OPFUN)(int *, int *, int *, int);
26 static OPFUN ops[] = { _ior, _and, _xor };
27
28 // Compare compound variables.
_ieq(int * x,int * y,int l)29 static int _ieq(int *x, int *y, int l) {
30 while (l-- > 0)
31 if (x[l] != y[l])
32 return 0;
33 return 1;
34 }
35
36 // Hash function for compound variables.
_hash(int * x,int l,int k)37 static int _hash(int *x, int l, int k) {
38 unsigned int j = l * 100;
39
40 k = 32 - k;
41 while (l-- > 0) {
42 j ^= 3141592653U * (unsigned int) x[l] >> k;
43 j *= 97;
44 }
45 return 3141592653U * j >> k;
46 }
47
48 // Add index to hash.
_hadd(SEXP x,int i,SEXP h,int k)49 static int _hadd(SEXP x, int i, SEXP h, int k) {
50 int j;
51 SEXP s;
52
53 s = VECTOR_ELT(x, i);
54 k = _hash(INTEGER(s), LENGTH(s), k);
55 while ((j = INTEGER(h)[k]) > -1) {
56 if (_ieq(INTEGER(VECTOR_ELT(x, j)), INTEGER(s), LENGTH(s)))
57 return j;
58 k = (k + 1) % LENGTH(h);
59 }
60 INTEGER(h)[k] = i;
61
62 return -1;
63 }
64
65 // Compute the closure of a set of binary vectors
66 // under the basic binary operations (see above).
67 //
68 // NOTE xor should not be used for obvious reasons.
69 //
70 // Inputs are a logical matrix with the elements in
71 // the rows and the atoms in the columns, and second
72 // an integer vector specifying the operator to use.
73 //
74 // The worst time complexity is O(min(2^nc, nr^2)).
75 // Note that normally the runtime is order dependent,
76 // e.g. it takes less iterations if the atoms are
77 // in increasing order with respect to size (number
78 // of bits).
79 //
80 // Returns a logical matrix. The rows are in depth-
81 // first-search order.
82 //
83 // Version: 0.2
84 //
85 // (C) ceeboo 2008
86
sets_closure(SEXP x,SEXP R_op)87 SEXP sets_closure(SEXP x, SEXP R_op) {
88 if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
89 error("'x' not a logical matrix");
90 if (!R_op || TYPEOF(R_op) != INTSXP)
91 error("'op' not an integer vector");
92 int i, j, k, l, n, nr, nc, nb, hk;
93 int *p;
94 OPFUN op;
95 SEXP r, q, q0, s, t, t0, ht;
96
97 nr = INTEGER(GET_DIM(x))[0];
98 nc = INTEGER(GET_DIM(x))[1];
99 if (!nc && nr)
100 error("'x' invalid dimensions");
101 if (nr < 2)
102 return x;
103
104 // FIXME does using all the bits also work
105 // on other platforms?
106 nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
107
108 if (INTEGER(R_op)[0] < 1 ||
109 INTEGER(R_op)[0] > sizeof(ops) / sizeof(OPFUN))
110 error("'op' invalid value");
111
112 op = ops[INTEGER(R_op)[0]-1];
113
114 t = PROTECT(allocVector(VECSXP, nr));
115
116 // Encode data. Note that the bits are
117 // spread evenly across the variables.
118 for (i = 0; i < nr; i++) {
119 SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
120 memset(INTEGER(s), 0, sizeof(int) * nb);
121 for (k = 0; k < nc; k++) {
122 j = k % nb;
123 INTEGER(s)[j] <<= 1;
124 INTEGER(s)[j] |= LOGICAL(x)[i+k*nr];
125 }
126 }
127 // Initialize hash table.
128 if (nr > 1073741824)
129 error("size %d too large for hashing", nr);
130 k = 2 * nr;
131 n = 2;
132 hk = 1;
133 while (k > n) {
134 n *= 2;
135 hk += 1;
136 }
137 ht = PROTECT(allocVector(INTSXP, n));
138 for (k = 0; k < n; k++)
139 INTEGER(ht)[k] = -1;
140
141 // Remove duplicates.
142 n = 0;
143 for (k = 0; k < nr; k++) {
144 if (_hadd(t, k, ht, hk) > -1)
145 continue;
146 if (n < k)
147 SET_VECTOR_ELT(t, n, VECTOR_ELT(t, k));
148 n++;
149 }
150 nr = n;
151
152 // Reset hash table.
153 for (k = 0; k < LENGTH(ht); k++)
154 INTEGER(ht)[k] = -1;
155
156 // Result vector.
157 n = 0;
158 r = PROTECT(allocVector(VECSXP, nr));
159 if (op == _xor) {
160 SET_VECTOR_ELT(r, n, (s = allocVector(INTSXP, nb)));
161 memset(INTEGER(s), 0, sizeof(int) * nb);
162 _hadd(r, n++, ht, hk);
163 }
164
165 // Initialize stack.
166 q = PROTECT(allocVector(VECSXP, nr + 1));
167 for (k = 2; k < nr + 1; k++)
168 SET_VECTOR_ELT(q, k, allocVector(INTSXP, nb));
169
170 j = 1;
171 p = INTEGER(PROTECT(allocVector(INTSXP, nr + 1)));
172 p[1] = 0;
173
174 #ifdef __DEBUG
175 i = 0;
176 Rprintf("# %7s %7s %7s %7s\n", "iter", "size", "RLEN", "HLEN");
177 #endif
178 while (j > 0) {
179 t0 = VECTOR_ELT(t, p[j]);
180 if (j > 1)
181 op(INTEGER(t0), INTEGER(VECTOR_ELT(q, j-1)),
182 INTEGER((q0 = VECTOR_ELT(q, j))), nb);
183 else
184 SET_VECTOR_ELT(q, j, (q0 = t0));
185
186 // Resize hash table.
187 if ((k = 2 * n) == LENGTH(ht)) {
188 if (n > 1073741824)
189 error("size %d too large for hashing");
190 UNPROTECT_PTR(ht);
191
192 ht = PROTECT(allocVector(INTSXP, k * 2));
193 for (k = 0; k < LENGTH(ht); k++)
194 INTEGER(ht)[k] = -1;
195 hk++;
196 for (k = 0; k < n; k++)
197 _hadd(r, k, ht, hk);
198 }
199 // Resize result vector.
200 if (n == LENGTH(r)) {
201 s = r;
202 r = PROTECT(allocVector(VECSXP, 2 * n));
203 for (k = 0; k < n; k++)
204 SET_VECTOR_ELT(r, k, VECTOR_ELT(s, k));
205
206 UNPROTECT_PTR(s);
207 }
208 SET_VECTOR_ELT(r, n, q0);
209
210 l = n;
211 if (_hadd(r, n, ht, hk) == -1)
212 SET_VECTOR_ELT(r, n++, duplicate(q0));
213
214 if (p[j] < nr - 1) {
215 if (n > l) { // expand sequence
216 j++;
217 p[j] = p[j-1] + 1;
218 } else // prune
219 p[j]++;
220 } else // backtrack
221 p[--j]++;
222 #ifdef __DEBUG
223 i++;
224 if (j < 2)
225 Rprintf("# %7i %7i %7i %7i\n", i, n, LENGTH(r), LENGTH(ht));
226 #endif
227 R_CheckUserInterrupt();
228 }
229 UNPROTECT(5);
230
231 // Decode result.
232 PROTECT(r);
233 s = allocMatrix(LGLSXP, n, nc);
234 for (i = 0; i < n; i++) {
235 q = VECTOR_ELT(r, i);
236 for (k = nc - 1; k > -1; k--) {
237 j = k % nb;
238 LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
239 INTEGER(q)[j] >>= 1;
240 }
241 }
242 UNPROTECT(1);
243
244 // Set colnames.
245 if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
246 PROTECT(s);
247 setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
248 SET_VECTOR_ELT(r, 0, R_NilValue);
249 SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
250
251 UNPROTECT(1);
252 }
253
254 return s;
255 }
256
257 // From the R-2.7.1 source code.
258 #define NUMERIC int
R_qsort_int_V(int * v,SEXP I,int i,int j)259 void R_qsort_int_V(int *v, SEXP I, int i, int j)
260 #include "qsort-Vbody.h"
261 #undef NUMERIC
262
263 // Complement a compound variable.
264 static void _not(int *x, int *r, int l)
265 {
266 while (l-- > 0)
267 r[l] = ~x[l];
268 }
269
270 // Test if the first compound variable
271 // is a subset of the second.
_iss(int * x,int * y,int l)272 static int _iss(int *x, int *y, int l)
273 {
274 while (l-- > 0)
275 if ((x[l] & y[l]) != x[l])
276 return 0;
277 return 1;
278 }
279
280 // Compute the reduction (base) of a logical matrix
281 // under union or intersection, i.e. a unique minimal
282 // subset of the rows that spans the same space.
283 //
284 // cf. Doignon and Falmagne (1999). Knowledge Spaces.
285 // Springer, pp. 29 -- 31.
286 //
287 // Inputs are as above.
288 //
289 // The worst time complexity is O(n^2). Note that
290 // r(x, &) == !r(!x, |), and vice versa.
291 //
292 // Returns a logical matrix.
293 //
294 // Version 0.1
295 //
296 // (C) ceeboo 2008
297
sets_reduction(SEXP x,SEXP R_op)298 SEXP sets_reduction(SEXP x, SEXP R_op) {
299 if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
300 error("'x' not a logical matrix");
301 if (!R_op || TYPEOF(R_op) != INTSXP)
302 error("'op' not an integer vector");
303 int i, j, k, l, n, nr, nc, nb;
304 SEXP r, s, q, q0, t;
305
306 nr = INTEGER(GET_DIM(x))[0];
307 nc = INTEGER(GET_DIM(x))[1];
308 if (!nc && nr)
309 error("'x' invalid dimensions");
310 if (nr < 2)
311 return x;
312
313 // FIXME does using all the bits also work
314 // on other platforms?
315 nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
316
317 if (INTEGER(R_op)[0] != 1 &&
318 INTEGER(R_op)[0] != 2)
319 error("'op' invalid value");
320
321 t = PROTECT(allocVector(VECSXP, nr));
322 q = PROTECT(allocVector(INTSXP, nr));
323
324 // Encode data.
325 for (i = 0; i < nr; i++) {
326 SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
327 memset(INTEGER(s), 0, sizeof(int) * nb);
328 n = 0;
329 for (k = 0; k < nc; k++) {
330 j = k % nb;
331 l = LOGICAL(x)[i+k*nr];
332 INTEGER(s)[j] <<= 1;
333 INTEGER(s)[j] |= l;
334 n += l;
335 }
336 // Transform to dual.
337 if (INTEGER(R_op)[0] == 2) {
338 _not(INTEGER(s), INTEGER(s), nb);
339 INTEGER(q)[i] = nc - n;
340 } else
341 INTEGER(q)[i] = n;
342 }
343
344 // Order breadth-first.
345 R_qsort_int_V(INTEGER(q), t, 1, nr);
346 UNPROTECT_PTR(q);
347
348 // Remove duplicates.
349 q = duplicated(t, FALSE);
350 n = 0;
351 for (i = 0; i < nr; i++) {
352 if (LOGICAL(q)[i] == TRUE)
353 continue;
354 if (n < i)
355 SET_VECTOR_ELT(t, n, VECTOR_ELT(t, i));
356 n++;
357 }
358 nr = n;
359
360 // Initialize.
361 q = PROTECT(allocVector(INTSXP, nb));
362 r = PROTECT(allocVector(VECSXP, nr));
363 SET_VECTOR_ELT(r, 0, VECTOR_ELT(t, 0));
364 n = 1;
365 #ifdef __DEBUG
366 k = 0;
367 Rprintf("# %7s %7s\n", "iter", "size");
368 #endif
369 for (i = 1; i < nr; i++) {
370 memset(INTEGER(q), 0, sizeof(int) * nb);
371 s = VECTOR_ELT(t, i);
372 for (j = i - 1; j > -1; j--) {
373 q0 = VECTOR_ELT(t, j);
374 if (!_iss(INTEGER(q0), INTEGER(s), nb))
375 continue;
376 _ior(INTEGER(q0), INTEGER(q), INTEGER(q), nb);
377 if (_ieq(INTEGER(s), INTEGER(q), nb))
378 goto next;
379 }
380 SET_VECTOR_ELT(r, n++, s);
381 next:
382 #ifdef __DEBUG
383 k += i - 1 - j;
384 Rprintf("# %7i %7i\n", k, n);
385 #endif
386 R_CheckUserInterrupt();
387 }
388 UNPROTECT_PTR(q);
389 UNPROTECT_PTR(t);
390
391 // Decode result.
392 s = allocMatrix(LGLSXP, n, nc);
393 for (i = 0; i < n; i++) {
394 q = VECTOR_ELT(r, i);
395 // Transform back.
396 if (INTEGER(R_op)[0] == 2)
397 _not(INTEGER(q), INTEGER(q), nb);
398 for (k = nc - 1; k > -1; k--) {
399 j = k % nb;
400 LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
401 INTEGER(q)[j] >>= 1;
402 }
403 }
404 UNPROTECT(1);
405
406 // Set colnames.
407 if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
408 PROTECT(s);
409 setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
410 // FIXME do we need rownames?
411 SET_VECTOR_ELT(r, 0, R_NilValue);
412 SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
413
414 UNPROTECT(1);
415 }
416
417 return s;
418 }
419
420 //
421