1 /*
2 ##############################################################################
3 # 	Copyright (c) 2000-2006 All rights reserved
4 # 	Alberto Reggiori <areggiori@webweaving.org>
5 #	Dirk-Willem van Gulik <dirkx@webweaving.org>
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions
9 # are met:
10 #
11 # 1. Redistributions of source code must retain the above copyright
12 #    notice, this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 #    notice, this list of conditions and the following disclaimer in
16 #    the documentation and/or other materials provided with the
17 #    distribution.
18 #
19 # 3. The end-user documentation included with the redistribution,
20 #    if any, must include the following acknowledgment:
21 #       "This product includes software developed by
22 #        Alberto Reggiori <areggiori@webweaving.org> and
23 #        Dirk-Willem van Gulik <dirkx@webweaving.org>."
24 #    Alternately, this acknowledgment may appear in the software itself,
25 #    if and wherever such third-party acknowledgments normally appear.
26 #
27 # 4. All advertising materials mentioning features or use of this software
28 #    must display the following acknowledgement:
29 #    This product includes software developed by the University of
30 #    California, Berkeley and its contributors.
31 #
32 # 5. Neither the name of the University nor the names of its contributors
33 #    may be used to endorse or promote products derived from this software
34 #    without specific prior written permission.
35 #
36 # 6. Products derived from this software may not be called "RDFStore"
37 #    nor may "RDFStore" appear in their names without prior written
38 #    permission.
39 #
40 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 # ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 # FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 # OF THE POSSIBILITY OF SUCH DAMAGE.
52 #
53 # ====================================================================
54 #
55 # This software consists of work developed by Alberto Reggiori and
56 # Dirk-Willem van Gulik. The RDF specific part is based based on public
57 # domain software written at the Stanford University Database Group by
58 # Sergey Melnik. For more information on the RDF API Draft work,
59 # please see <http://www-db.stanford.edu/~melnik/rdf/api.html>
60 # The DBMS TCP/IP server part is based on software originally written
61 # by Dirk-Willem van Gulik for Web Weaving Internet Engineering m/v Enschede,
62 # The Netherlands.
63 #
64 ##############################################################################
65 
66  $Id: rdfstore_bits.c,v 1.16 2006/06/19 10:10:21 areggiori Exp $
67 */
68 
69 #include <sys/types.h>
70 #include <sys/time.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <fcntl.h>
74 #include <string.h>
75 #include <assert.h>
76 
77 #include "dbms.h"
78 #include "dbms_compat.h"
79 
80 #include "rdfstore.h"
81 #include "rdfstore_bits.h"
82 #include "rdfstore_log.h"
83 
84 /* within p->size / p->bits
85  * set at -bit- position 'at'
86  * the masked bits to value
87  *
88  * return the modified bits
89  */
90 
91 /*
92  * #define RDFSTORE_DEBUG_BITS
93  */
94 
rdfstore_bits_setmask(unsigned int * size,unsigned char * bits,unsigned int at,unsigned int mask,unsigned int value,unsigned int max)95 int rdfstore_bits_setmask(
96 	unsigned int * size,
97 	unsigned char * bits,
98 	unsigned int at,
99 	unsigned int mask,
100 	unsigned int value,
101 	unsigned int max
102 ) {
103 	register int depth,change;
104 
105 	if (mask == 0) return(0);
106 
107 	/* auto extend if needed... */
108 	if ( (at/8) >= *size) {
109 		unsigned int n=*size;
110 		unsigned int s= STEP * ( 1 + at/8/STEP );
111 		if (s>max) {
112 			fprintf(stderr, "Too many bit=%d byte=%d %d of %d\n",
113 				at, at/8, s, max);
114 			exit(1);
115 			};
116 		*size = s;
117 		bzero(bits+n, s-n);
118 		};
119 
120 	/*	x x x x x x as stored
121 	 *	0 0 1 1 1 0 mask
122   	 *	0 0 0 1 1 0 value
123 	 */
124 	mask <<= at % 8;
125 	value <<= at % 8;
126 	at /= 8;
127 	change =0; depth = 0;
128 	do {
129 		register unsigned char d,c;
130 		if (at>=max) {
131 			fprintf(stderr,"Uncontrolled overflow %d of %d\n",
132 				at, max);
133 			exit(1);
134 			};
135 
136 		c = bits[ at ];
137 		d = ( c & (~ mask) ) | value;
138 
139 		if (d != c) {
140 			bits[ at ] = d;
141 			change |= (d ^ c) << depth;
142 			};
143 		at ++;
144 
145 		depth += 8;
146 		mask >>= 8;
147 		value >>= 8;
148 
149 		} while ((mask) && (at < *size ));
150 
151 	return (change);
152 	};
153 
154 /* Return the record number (bit number / 4) of the
155  * first record from record 'at' onwards which has
156  * a bit set within the mask.
157  */
158 unsigned int
rdfstore_bits_getfirstrecord(unsigned int size,unsigned char * bits,unsigned int at,unsigned char mask)159 rdfstore_bits_getfirstrecord (
160         unsigned int size,	/* in bytes */
161         unsigned char * bits,	/* bit array */
162         unsigned int at, 	/* as record no (bits/4) */
163         unsigned char mask	/* 0000 to 1111 */
164 ) {
165 	unsigned mask2 = mask << 4;
166 	unsigned i = at >> 1;
167 	unsigned char c = bits[i];
168 
169 	assert(mask < 16);
170 	assert(mask != 0);
171 
172 	if (at & 1)
173 		c &= 0xF0;
174 
175 	do {
176 		if ((c & 0x0f) & mask)
177 			return 2*i+0;
178 		if ((c & 0xf0) & mask2)
179 			return 2*i+1;
180 		c = bits[ ++i ];
181 	} while (i < size);
182 
183 	return size*2;
184 }
185 
186 /*
187  * rdfstore_bits_isanyset - returns != 0 if any bit in the bitmask is set
188  * in addition it returns the positions of the first bit set in at
189  *
190  */
191 
rdfstore_bits_isanyset(unsigned int * size,unsigned char * bits,unsigned int * at,unsigned char mask)192 int rdfstore_bits_isanyset(
193         unsigned int * size,
194         unsigned char * bits,
195         unsigned int * at,
196         unsigned char mask
197 ) {
198 	register unsigned rest=0;
199 	rest = ( *at % 8 );
200         mask = mask << rest;
201         *at /= 8;
202 
203         while ((mask) && (*at < *size)) {
204                 register int c = bits[ *at ] & mask;
205                 if (c) {
206 			(*at) *=8;
207 			(*at) += rest;
208 			return c;
209 			};
210                 (*at)++;
211                 };
212 
213         return 0;
214         };
215 
216 /*
217  * returns the first bit set from (and including) at in the
218  * bit array of size bytes. If no bit set is found after
219  * the size-est byte of bits; size*8 is returned (i.e. the number
220  * of * last bit (not byte)+1; starting from zero.
221  *
222  * size		in bytes
223  * bits		unsigned array of bytes with 8 bits each
224  * at		location in bits.
225  * return	location in bits (or size*8).
226  *
227  */
rdfstore_bits_getfirstsetafter(unsigned int size,unsigned char * bits,unsigned int at)228 unsigned int rdfstore_bits_getfirstsetafter (
229         unsigned int size,
230         unsigned char * bits,
231         unsigned int at
232 ) {
233         register unsigned int i = at >> 3;
234         register unsigned char c = bits[ i ];
235 
236         /* first byte is special; skip over the bits
237          * before 'at'.
238          */
239         c &= ( 0xFF << (at & 0x7 ));
240         do {
241                 if (c) {
242 			i <<= 3;
243 #define _RX(x) 		if (c & (1<<x)) return (i+x);
244 			_RX(0); _RX(1); _RX(2); _RX(3);
245 			_RX(4); _RX(5); _RX(6);
246 			return (i+7);
247 #undef _RX
248                 }
249                 i++;
250                 c = bits[i];
251         } while (i < size);
252 
253 	/* Fail; return bits+1. */
254         return size<<3;
255 }
256 
257 /* slightly tricky bin-ops; in that it can cope with 'infinitive
258  * lenght type tricks.. Returns the len of the changed bitseq.
259  */
260 
261 /*
262  * exor - Exor's to bitvectors to each other, ba of length la and bb of len lb
263  *
264  * returns result in bc (should be preallocated) and length as function result
265  */
266 
rdfstore_bits_exor(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)267 unsigned int rdfstore_bits_exor (
268 	unsigned int la, unsigned char * ba,
269 	unsigned int lb, unsigned char * bb,
270 	unsigned char * bc
271 	)
272 {
273 	register unsigned int len,i;
274 	/* set in a, but not set in b
275 	 * a b -> a|b ^ b
276          * 0 0    0     0
277 	 * 0 1    1     0
278 	 * 1 0    1     1
279 	 * 1 1    1     0
280 	 */
281 #if 0
282 	A real EXOR does
283 	 00 -> 0
284 	 10 -> 1
285 	 01 -> 1
286 	 11 -> 0
287 #endif
288 	for(len=0,i=0; (i<la) || (i<lb); i++) {
289 		register unsigned char a = ( i>=la ) ? 0 : ba[i];
290 		register unsigned char b = ( i>=lb ) ? 0 : bb[i];
291 #if 0
292 		/* real exor */
293 	  	register unsigned char c = a ^ b;
294 #endif
295 	  	register unsigned char c = (a | b) ^ b;
296 	  	if (c) len = i+1;
297 		bc[i] = c;
298 		};
299 	return len;
300 	};
301 
302 /*
303  * or - Or's to bitvectors to each other, ba of length la and bb of length lb
304  *
305  * returns result in bc (should be preallocated) and length as function result
306  */
307 
rdfstore_bits_or(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)308 unsigned int rdfstore_bits_or (
309 	unsigned int la, unsigned char * ba,
310 	unsigned int lb, unsigned char * bb,
311 	unsigned char * bc
312 	)
313 {
314 	register unsigned int len,i;
315 	for(len=0,i=0; (i<la) || (i<lb); i++) {
316 		register unsigned char a = ( i>=la ) ? 0 : ba[i];
317 		register unsigned char b = ( i>=lb ) ? 0 : bb[i];
318 	  	register unsigned char c = (a | b);
319 	  	if (c) len = i+1;
320 		bc[i] = c;
321 		};
322 	return len;
323 	};
324 
325 
326 /*
327  * and - And's to bitvectors to each other, ba of length la and bb of length lb
328  *
329  * returns result in bc (should be preallocated) and length as function result
330  */
331 
rdfstore_bits_and(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)332 unsigned int rdfstore_bits_and (
333 	unsigned int la, unsigned char * ba,
334 	unsigned int lb, unsigned char * bb,
335 	unsigned char * bc
336 	)
337 {
338 	register unsigned int len,i;
339 	for(len=0,i=0; (i<la) && (i<lb); i++) {
340 		register unsigned char a = ( i>=la ) ? 0 : ba[i];
341 		register unsigned char b = ( i>=lb ) ? 0 : bb[i];
342 	  	register unsigned char c = (a & b);
343 	  	if (c) len = i+1;
344 		bc[i] = c;
345 		};
346 
347 	return len;
348 	};
349 
350 /*
351  * not - Not's a bitvector ba of length la
352  *
353  * returns result in bb (should be preallocated) and length as function result
354  */
355 
rdfstore_bits_not(unsigned int la,unsigned char * ba,unsigned char * bb)356 unsigned int rdfstore_bits_not (
357 	unsigned int la, unsigned char * ba,
358 	unsigned char * bb
359 	)
360 {
361 	register unsigned int len,i;
362 	for(len=0,i=0; (i<la) ; i++) {
363 		register unsigned char a = ( i>=la ) ? 0 : ba[i];
364 	  	register unsigned char b = ~ a;
365 	  	if (b) len = i+1;
366 		bb[i] = b;
367 		};
368 
369 	return len;
370 	};
371 
372 
373 /*
374  * shorten - removes the top zero bits of the bitvector
375  *
376  * returns length of bitvector (without trailing zeroes) as bytes as function
377  * result
378  */
379 
rdfstore_bits_shorten(unsigned int la,unsigned char * ba)380 unsigned int rdfstore_bits_shorten(
381 	unsigned int la, unsigned char * ba
382 	)
383 {
384 	while( ( la >0 ) && (ba[la-1] == 0) ) la--;
385 	return(la);
386 	};
387 
388 /* n = 6 - size of a record.
389  * A = row of records; at 1 bit wide.
390  * 	lenght in bytes, not bits !
391  * B = row of records; each 6 bits wide.
392  * 	lenght in bytes, not bits !
393  * M = mask of 6 bits.
394  * 	no lenght
395  * OUT:
396  *	bc filled
397  *	returns number of bytes in use.
398  *
399  */
rdfstore_bits_and2(int n,unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char mask,unsigned char * bc)400 unsigned int rdfstore_bits_and2(
401 	int n,
402 	unsigned int la, unsigned char * ba,
403 	unsigned int lb, unsigned char * bb,
404 	unsigned char mask,
405 	unsigned char * bc
406 	)
407 {
408 	unsigned int i = 0;
409 	int endbit = la * 8;
410 	assert(n <= 8);			/* up to 8 bits - see q+1 below */
411 	assert(mask < (1<<n));		/* Mask cannot be bigger than N bits */
412 
413 	bzero(bc,la);		/* Out array of length A max */
414 
415 	/* If B has less records than A has bits; shorten A
416 	 */
417 	if (lb * 8 / n < endbit)
418 		endbit = (lb * 8 / n) * 8;
419 
420 #ifdef RDFSTORE_DEBUG_BITS
421 {
422 int             i,j=0;
423 printf("rdfstore_bits_and2 la=%d lb=%d endbit=%d endbyte=%d\n",(int)la,(int)lb,endbit,endbit/8);
424 printf("rdfstore_bits_and2 ba -->'");
425 for(i=0;i<8*la;i++) {
426 	printf("Rec %d %c\n", i,(ba[i>>3] & (1<<(i&7))) ? '1':'0');
427         };
428 printf("'\n");
429 printf("rdfstore_bits_and2 bb -->'");
430 for(i=0;i<8*lb;i++) {
431 	if (i % n == 0) {
432 		int a = 0;
433 		if (j<8*la) a= ba[j>>3] & (1<<(j&7));
434 		printf("Rec %d A=%d ",j,a ? '1':'0');
435 		j++;
436 	};
437 	printf("%c", (bb[i>>3] & (1<<(i&7))) ? '1':'0');
438 	if (i % n == n-1) printf("\n");
439         };
440 printf("'\n");
441 printf("rdfstore_bits_and2 mask -->'");
442 for(i=0;i<8;i++) {
443 	printf("%c", (mask & (1<<(i&7))) ? '1':'0');
444         };
445 printf("'\n");
446 }
447 #endif
448 
449 	for(i=0; i < endbit ; i++) {
450 		/* Check if bit 'i' is set or not
451 		 */
452 		if (ba[ i>>3 ] & (1<<(i & 7))) {
453 			unsigned int p = n * i;		/* bit number where the record starts */
454 			unsigned int q = p >> 3;	/* byte number. */
455 			unsigned int r = p & 7;		/* bit offset in the byte */
456 			unsigned int record;
457 
458 			/* fetch N bits from the B. Note 8 bits max now; if we have
459 			 * records of more than 8 bits; then add q + 2.
460 			 */
461 			record = (((bb[ q + 1 ] << 8) + bb[ q ]) >> (r));
462 
463 			/* If there is one or more bits in the record set; within
464 			 * the mask; set a bit at recno in the output.
465 			 */
466 
467 			if (record & mask) /* and2 */
468 				bc[ i >> 3 ] |= (1 << ( i & 7));
469 			};
470 		};
471 
472 #ifdef RDFSTORE_DEBUG_BITS
473 {
474 int  j;
475 printf("rdfstore_bits_or2 bc -->'");
476 for(j=0;j<8*(i>>3);j++) {
477 	printf("Rec %d %c\n", j,(bc[j>>3] & (1<<(j&7))) ? '1':'0');
478         };
479 printf("'\n");
480 };
481 #endif
482 
483 	/* Return the lenght in bytes, not bits */
484 	return i >> 3;
485 	};
486 
487 /* n = 6 - size of a record.
488  * A = row of records; at 1 bit wide.
489  * 	lenght in bytes, not bits !
490  * B = row of records; each 6 bits wide.
491  * 	lenght in bytes, not bits !
492  * M = mask of 6 bits.
493  * 	no lenght
494  * OUT:
495  *	bc filled
496  *	returns number of bytes in use.
497  *
498  */
rdfstore_bits_or2(int n,unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char mask,unsigned char * bc)499 unsigned int rdfstore_bits_or2(
500 	int n,
501 	unsigned int la, unsigned char * ba,
502 	unsigned int lb, unsigned char * bb,
503 	unsigned char mask,
504 	unsigned char * bc
505 	)
506 {
507 	unsigned int i = 0;
508 	int endbit = la * 8;
509 	assert(n <= 8);			/* up to 8 bits - see q+1 below */
510 	assert(mask < (1<<n));		/* Mask cannot be bigger than N bits */
511 
512 	bzero(bc,la);		/* Out array of length A max */
513 
514 	/* If B has less records than A has bits; shorten A
515 	 */
516 	if (lb * 8 / n < endbit)
517 		endbit = (lb * 8 / n) * 8;
518 
519 #ifdef RDFSTORE_DEBUG_BITS
520 {
521 int             i,j=0;
522 printf("rdfstore_bits_or2 la=%d lb=%d endbit=%d endbyte=%d\n",(int)la,(int)lb,endbit,endbit/8);
523 printf("rdfstore_bits_or2 ba -->'");
524 for(i=0;i<8*la;i++) {
525 	printf("Rec %d %c\n", i,(ba[i>>3] & (1<<(i&7))) ? '1':'0');
526         };
527 printf("'\n");
528 printf("rdfstore_bits_or2 bb -->'");
529 for(i=0;i<8*lb;i++) {
530 	if (i % n == 0) {
531 		int a = 0;
532 		if (j<8*la) a= ba[j>>3] & (1<<(j&7));
533 		printf("Rec %d A=%d ",j,a ? '1':'0');
534 		j++;
535 	};
536 	printf("%c", (bb[i>>3] & (1<<(i&7))) ? '1':'0');
537 	if (i % n == n-1) printf("\n");
538         };
539 printf("'\n");
540 printf("rdfstore_bits_or2 mask -->'");
541 for(i=0;i<8;i++) {
542 	printf("%c", (mask & (1<<(i&7))) ? '1':'0');
543         };
544 printf("'\n");
545 }
546 #endif
547 
548 	for(i=0; i < endbit ; i++) {
549 		unsigned int p = n * i;		/* bit number where the record starts */
550 		unsigned int q = p >> 3;	/* byte number. */
551 		unsigned int r = p & 7;		/* bit offset in the byte */
552 		unsigned int record;
553 
554 		/* fetch N bits from the B. Note 8 bits max now; if we have
555 		 * records of more than 8 bits; then add q + 2.
556 		 */
557 		record = (((bb[ q + 1 ] << 8) + bb[ q ]) >> (r));
558 
559 		/* If there is one or more bits in the record set; within
560 		 * the mask; set a bit at recno in the output.
561 		 */
562 
563 		if ( (ba[ i>>3 ] & (1<<(i & 7))) | (record & mask) ) /* or2 */
564 			bc[ i >> 3 ] |= (1 << ( i & 7));
565 		};
566 
567 #ifdef RDFSTORE_DEBUG_BITS
568 {
569 int  j;
570 printf("rdfstore_bits_or2 bc -->'");
571 for(j=0;j<8*(i>>3);j++) {
572         printf("Rec %d %c\n", j,(bc[j>>3] & (1<<(j&7))) ? '1':'0');
573         };
574 printf("'\n");
575 };
576 #endif
577 
578 	/* Return the lenght in bytes, not bits */
579 	return i >> 3;
580 	};
581