1 #include <R.h>
2 #include <Rdefines.h>
3 
4 //#define __DEBUG
5 
6 // Bit operations on compound variables.
_ior(int * x,int * y,int * r,int l)7 static void _ior(int *x, int *y, int *r, int l)
8 {
9     while (l-- > 0)
10 	r[l] = x[l] | y[l];
11 }
12 
_xor(int * x,int * y,int * r,int l)13 static void _xor(int *x, int *y, int *r, int l)
14 {
15     while (l-- > 0)
16 	r[l] = x[l] ^ y[l];
17 }
18 
_and(int * x,int * y,int * r,int l)19 static void _and(int *x, int *y, int *r, int l)
20 {
21     while (l-- > 0)
22 	r[l] = x[l] & y[l];
23 }
24 
25 typedef void (*OPFUN)(int *, int *, int *, int);
26 static OPFUN ops[] = { _ior, _and, _xor };
27 
28 // Compare compound variables.
_ieq(int * x,int * y,int l)29 static int _ieq(int *x, int *y, int l) {
30     while (l-- > 0)
31 	if (x[l] != y[l])
32 	    return 0;
33     return 1;
34 }
35 
36 // Hash function for compound variables.
_hash(int * x,int l,int k)37 static int _hash(int *x, int l, int k) {
38     unsigned int j = l * 100;
39 
40     k = 32 - k;
41     while (l-- > 0) {
42 	j ^= 3141592653U * (unsigned int) x[l] >> k;
43 	j *= 97;
44     }
45    return 3141592653U * j >> k;
46 }
47 
48 // Add index to hash.
_hadd(SEXP x,int i,SEXP h,int k)49 static int _hadd(SEXP x, int i, SEXP h, int k) {
50     int j;
51     SEXP s;
52 
53     s = VECTOR_ELT(x, i);
54     k = _hash(INTEGER(s), LENGTH(s), k);
55     while ((j = INTEGER(h)[k]) > -1) {
56 	if (_ieq(INTEGER(VECTOR_ELT(x, j)), INTEGER(s), LENGTH(s)))
57 	    return j;
58 	k = (k + 1) % LENGTH(h);
59     }
60     INTEGER(h)[k] = i;
61 
62     return -1;
63 }
64 
65 // Compute the closure of a set of binary vectors
66 // under the basic binary operations (see above).
67 //
68 // NOTE xor should not be used for obvious reasons.
69 //
70 // Inputs are a logical matrix with the elements in
71 // the rows and the atoms in the columns, and second
72 // an integer vector specifying the operator to use.
73 //
74 // The worst time complexity is O(min(2^nc, nr^2)).
75 // Note that normally the runtime is order dependent,
76 // e.g. it takes less iterations if the atoms are
77 // in increasing order with respect to size (number
78 // of bits).
79 //
80 // Returns a logical matrix. The rows are in depth-
81 // first-search order.
82 //
83 // Version: 0.2
84 //
85 // (C) ceeboo 2008
86 
sets_closure(SEXP x,SEXP R_op)87 SEXP sets_closure(SEXP x, SEXP R_op) {
88     if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
89 	error("'x' not a logical matrix");
90     if (!R_op || TYPEOF(R_op) != INTSXP)
91 	error("'op' not an integer vector");
92     int i, j, k, l, n, nr, nc, nb, hk;
93     int *p;
94     OPFUN op;
95     SEXP r, q, q0, s, t, t0, ht;
96 
97     nr = INTEGER(GET_DIM(x))[0];
98     nc = INTEGER(GET_DIM(x))[1];
99     if (!nc && nr)
100 	error("'x' invalid dimensions");
101     if (nr < 2)
102 	return x;
103 
104     // FIXME does using all the bits also work
105     //       on other platforms?
106     nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
107 
108     if (INTEGER(R_op)[0] < 1 ||
109 	INTEGER(R_op)[0] > sizeof(ops) / sizeof(OPFUN))
110 	error("'op' invalid value");
111 
112     op = ops[INTEGER(R_op)[0]-1];
113 
114     t = PROTECT(allocVector(VECSXP, nr));
115 
116     // Encode data. Note that the bits are
117     // spread evenly across the variables.
118     for (i = 0; i < nr; i++) {
119 	SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
120 	memset(INTEGER(s), 0, sizeof(int) * nb);
121 	for (k = 0; k < nc; k++) {
122 	    j = k % nb;
123 	    INTEGER(s)[j] <<= 1;
124 	    INTEGER(s)[j]  |= LOGICAL(x)[i+k*nr];
125 	}
126     }
127     // Initialize hash table.
128     if (nr > 1073741824)
129 	error("size %d too large for hashing", nr);
130     k  = 2 * nr;
131     n  = 2;
132     hk = 1;
133     while (k > n) {
134 	n  *= 2;
135 	hk += 1;
136     }
137     ht = PROTECT(allocVector(INTSXP, n));
138     for (k = 0; k < n; k++)
139 	INTEGER(ht)[k] = -1;
140 
141     // Remove duplicates.
142     n = 0;
143     for (k = 0; k < nr; k++) {
144 	if (_hadd(t, k, ht, hk) > -1)
145 	    continue;
146 	if (n < k)
147 	    SET_VECTOR_ELT(t, n, VECTOR_ELT(t, k));
148 	n++;
149     }
150     nr = n;
151 
152     // Reset hash table.
153     for (k = 0; k < LENGTH(ht); k++)
154 	INTEGER(ht)[k] = -1;
155 
156     // Result vector.
157     n = 0;
158     r = PROTECT(allocVector(VECSXP, nr));
159     if (op == _xor) {
160 	SET_VECTOR_ELT(r, n, (s = allocVector(INTSXP, nb)));
161 	memset(INTEGER(s), 0, sizeof(int) * nb);
162 	_hadd(r, n++, ht, hk);
163     }
164 
165     // Initialize stack.
166     q = PROTECT(allocVector(VECSXP, nr + 1));
167     for (k = 2; k < nr + 1; k++)
168 	SET_VECTOR_ELT(q, k, allocVector(INTSXP, nb));
169 
170     j = 1;
171     p = INTEGER(PROTECT(allocVector(INTSXP, nr + 1)));
172     p[1] = 0;
173 
174 #ifdef __DEBUG
175     i = 0;
176     Rprintf("# %7s %7s %7s %7s\n", "iter", "size", "RLEN", "HLEN");
177 #endif
178     while (j > 0) {
179 	t0 = VECTOR_ELT(t, p[j]);
180 	if (j > 1)
181 	    op(INTEGER(t0), INTEGER(VECTOR_ELT(q, j-1)),
182 		      INTEGER((q0 = VECTOR_ELT(q, j))), nb);
183 	else
184 	    SET_VECTOR_ELT(q, j, (q0 = t0));
185 
186 	// Resize hash table.
187 	if ((k = 2 * n) == LENGTH(ht)) {
188 	    if (n > 1073741824)
189 		error("size %d too large for hashing");
190 	    UNPROTECT_PTR(ht);
191 
192 	    ht = PROTECT(allocVector(INTSXP, k * 2));
193 	    for (k = 0; k < LENGTH(ht); k++)
194 		INTEGER(ht)[k] = -1;
195 	    hk++;
196 	    for (k = 0; k < n; k++)
197 		_hadd(r, k, ht, hk);
198 	}
199 	// Resize result vector.
200 	if (n == LENGTH(r)) {
201 	    s = r;
202 	    r = PROTECT(allocVector(VECSXP, 2 * n));
203 	    for (k = 0; k < n; k++)
204 		SET_VECTOR_ELT(r, k, VECTOR_ELT(s, k));
205 
206 	    UNPROTECT_PTR(s);
207 	}
208 	SET_VECTOR_ELT(r, n, q0);
209 
210 	l = n;
211 	if (_hadd(r, n, ht, hk) == -1)
212 	    SET_VECTOR_ELT(r, n++, duplicate(q0));
213 
214 	if (p[j] < nr - 1) {
215 	    if (n > l) {		// expand sequence
216 		j++;
217 		p[j] = p[j-1] + 1;
218 	    } else			// prune
219 		p[j]++;
220 	} else				// backtrack
221 	    p[--j]++;
222 #ifdef __DEBUG
223 	i++;
224 	if (j < 2)
225 	    Rprintf("# %7i %7i %7i %7i\n", i, n, LENGTH(r), LENGTH(ht));
226 #endif
227 	R_CheckUserInterrupt();
228     }
229     UNPROTECT(5);
230 
231     // Decode result.
232     PROTECT(r);
233     s = allocMatrix(LGLSXP, n, nc);
234     for (i = 0; i < n; i++) {
235 	q = VECTOR_ELT(r, i);
236 	for (k = nc - 1; k > -1; k--) {
237 	    j = k % nb;
238 	    LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
239 	    INTEGER(q)[j] >>= 1;
240 	}
241     }
242     UNPROTECT(1);
243 
244     // Set colnames.
245     if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
246 	PROTECT(s);
247 	setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
248 	SET_VECTOR_ELT(r, 0, R_NilValue);
249 	SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
250 
251 	UNPROTECT(1);
252     }
253 
254     return s;
255 }
256 
257 // From the R-2.7.1 source code.
258 #define NUMERIC int
R_qsort_int_V(int * v,SEXP I,int i,int j)259 void R_qsort_int_V(int *v, SEXP I, int i, int j)
260 #include "qsort-Vbody.h"
261 #undef NUMERIC
262 
263 // Complement a compound variable.
264 static void _not(int *x, int *r, int l)
265 {
266     while (l-- > 0)
267 	r[l] = ~x[l];
268 }
269 
270 // Test if the first compound variable
271 // is a subset of the second.
_iss(int * x,int * y,int l)272 static int _iss(int *x, int *y, int l)
273 {
274     while (l-- > 0)
275 	if ((x[l] & y[l]) != x[l])
276 	    return 0;
277     return 1;
278 }
279 
280 // Compute the reduction (base) of a logical matrix
281 // under union or intersection, i.e. a unique minimal
282 // subset of the rows that spans the same space.
283 //
284 // cf. Doignon and Falmagne (1999). Knowledge Spaces.
285 //     Springer, pp. 29 -- 31.
286 //
287 // Inputs are as above.
288 //
289 // The worst time complexity is O(n^2). Note that
290 // r(x, &) == !r(!x, |), and vice versa.
291 //
292 // Returns a logical matrix.
293 //
294 // Version 0.1
295 //
296 // (C) ceeboo 2008
297 
sets_reduction(SEXP x,SEXP R_op)298 SEXP sets_reduction(SEXP x, SEXP R_op) {
299     if (!x || !isMatrix(x) || TYPEOF(x) != LGLSXP)
300 	error("'x' not a logical matrix");
301     if (!R_op || TYPEOF(R_op) != INTSXP)
302 	error("'op' not an integer vector");
303     int i, j, k, l, n, nr, nc, nb;
304     SEXP r, s, q, q0, t;
305 
306     nr = INTEGER(GET_DIM(x))[0];
307     nc = INTEGER(GET_DIM(x))[1];
308     if (!nc && nr)
309 	error("'x' invalid dimensions");
310     if (nr < 2)
311 	return x;
312 
313     // FIXME does using all the bits also work
314     //       on other platforms?
315     nb = (int) ceil((double) nc / (sizeof(int) * CHAR_BIT));
316 
317     if (INTEGER(R_op)[0] != 1 &&
318 	INTEGER(R_op)[0] != 2)
319 	error("'op' invalid value");
320 
321     t = PROTECT(allocVector(VECSXP, nr));
322     q = PROTECT(allocVector(INTSXP, nr));
323 
324     // Encode data.
325     for (i = 0; i < nr; i++) {
326 	SET_VECTOR_ELT(t, i, (s = allocVector(INTSXP, nb)));
327 	memset(INTEGER(s), 0, sizeof(int) * nb);
328 	n = 0;
329 	for (k = 0; k < nc; k++) {
330 	    j = k % nb;
331 	    l = LOGICAL(x)[i+k*nr];
332 	    INTEGER(s)[j] <<= 1;
333 	    INTEGER(s)[j]  |= l;
334 	    n += l;
335 	}
336 	// Transform to dual.
337 	if (INTEGER(R_op)[0] == 2) {
338 	    _not(INTEGER(s), INTEGER(s), nb);
339 	    INTEGER(q)[i] = nc - n;
340 	} else
341 	    INTEGER(q)[i] = n;
342     }
343 
344     // Order breadth-first.
345     R_qsort_int_V(INTEGER(q), t, 1, nr);
346     UNPROTECT_PTR(q);
347 
348     // Remove duplicates.
349     q = duplicated(t, FALSE);
350     n = 0;
351     for (i = 0; i < nr; i++) {
352 	if (LOGICAL(q)[i] == TRUE)
353 	    continue;
354 	if (n < i)
355 	    SET_VECTOR_ELT(t, n, VECTOR_ELT(t, i));
356 	n++;
357     }
358     nr = n;
359 
360     // Initialize.
361     q = PROTECT(allocVector(INTSXP, nb));
362     r = PROTECT(allocVector(VECSXP, nr));
363     SET_VECTOR_ELT(r, 0, VECTOR_ELT(t, 0));
364     n = 1;
365 #ifdef __DEBUG
366     k = 0;
367     Rprintf("# %7s %7s\n", "iter", "size");
368 #endif
369     for (i = 1; i < nr; i++) {
370 	memset(INTEGER(q), 0, sizeof(int) * nb);
371 	s = VECTOR_ELT(t, i);
372 	for (j = i - 1; j > -1; j--) {
373 	    q0 = VECTOR_ELT(t, j);
374 	    if (!_iss(INTEGER(q0), INTEGER(s), nb))
375 		continue;
376 	    _ior(INTEGER(q0), INTEGER(q), INTEGER(q), nb);
377 	    if (_ieq(INTEGER(s), INTEGER(q), nb))
378 		goto next;
379 	}
380 	SET_VECTOR_ELT(r, n++, s);
381     next:
382 #ifdef __DEBUG
383 	k += i - 1 - j;
384 	Rprintf("# %7i %7i\n", k, n);
385 #endif
386 	R_CheckUserInterrupt();
387     }
388     UNPROTECT_PTR(q);
389     UNPROTECT_PTR(t);
390 
391     // Decode result.
392     s = allocMatrix(LGLSXP, n, nc);
393     for (i = 0; i < n; i++) {
394 	q = VECTOR_ELT(r, i);
395 	// Transform back.
396 	if (INTEGER(R_op)[0] == 2)
397 	    _not(INTEGER(q), INTEGER(q), nb);
398 	for (k = nc - 1; k > -1; k--) {
399 	    j = k % nb;
400 	    LOGICAL(s)[i+k*n] = INTEGER(q)[j] & 1;
401 	    INTEGER(q)[j] >>= 1;
402 	}
403     }
404     UNPROTECT(1);
405 
406     // Set colnames.
407     if (!isNull((q = getAttrib(x, R_DimNamesSymbol)))) {
408 	PROTECT(s);
409 	setAttrib(s, R_DimNamesSymbol, (r = allocVector(VECSXP, 2)));
410 	// FIXME do we need rownames?
411 	SET_VECTOR_ELT(r, 0, R_NilValue);
412 	SET_VECTOR_ELT(r, 1, VECTOR_ELT(q, 1));
413 
414 	UNPROTECT(1);
415     }
416 
417     return s;
418 }
419 
420 //
421