1 /*
2 ##############################################################################
3 # Copyright (c) 2000-2006 All rights reserved
4 # Alberto Reggiori <areggiori@webweaving.org>
5 # Dirk-Willem van Gulik <dirkx@webweaving.org>
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions
9 # are met:
10 #
11 # 1. Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in
16 # the documentation and/or other materials provided with the
17 # distribution.
18 #
19 # 3. The end-user documentation included with the redistribution,
20 # if any, must include the following acknowledgment:
21 # "This product includes software developed by
22 # Alberto Reggiori <areggiori@webweaving.org> and
23 # Dirk-Willem van Gulik <dirkx@webweaving.org>."
24 # Alternately, this acknowledgment may appear in the software itself,
25 # if and wherever such third-party acknowledgments normally appear.
26 #
27 # 4. All advertising materials mentioning features or use of this software
28 # must display the following acknowledgement:
29 # This product includes software developed by the University of
30 # California, Berkeley and its contributors.
31 #
32 # 5. Neither the name of the University nor the names of its contributors
33 # may be used to endorse or promote products derived from this software
34 # without specific prior written permission.
35 #
36 # 6. Products derived from this software may not be called "RDFStore"
37 # nor may "RDFStore" appear in their names without prior written
38 # permission.
39 #
40 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 # ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 # FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 # OF THE POSSIBILITY OF SUCH DAMAGE.
52 #
53 # ====================================================================
54 #
55 # This software consists of work developed by Alberto Reggiori and
56 # Dirk-Willem van Gulik. The RDF specific part is based based on public
57 # domain software written at the Stanford University Database Group by
58 # Sergey Melnik. For more information on the RDF API Draft work,
59 # please see <http://www-db.stanford.edu/~melnik/rdf/api.html>
60 # The DBMS TCP/IP server part is based on software originally written
61 # by Dirk-Willem van Gulik for Web Weaving Internet Engineering m/v Enschede,
62 # The Netherlands.
63 #
64 ##############################################################################
65
66 $Id: rdfstore_bits.c,v 1.16 2006/06/19 10:10:21 areggiori Exp $
67 */
68
69 #include <sys/types.h>
70 #include <sys/time.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <fcntl.h>
74 #include <string.h>
75 #include <assert.h>
76
77 #include "dbms.h"
78 #include "dbms_compat.h"
79
80 #include "rdfstore.h"
81 #include "rdfstore_bits.h"
82 #include "rdfstore_log.h"
83
84 /* within p->size / p->bits
85 * set at -bit- position 'at'
86 * the masked bits to value
87 *
88 * return the modified bits
89 */
90
91 /*
92 * #define RDFSTORE_DEBUG_BITS
93 */
94
rdfstore_bits_setmask(unsigned int * size,unsigned char * bits,unsigned int at,unsigned int mask,unsigned int value,unsigned int max)95 int rdfstore_bits_setmask(
96 unsigned int * size,
97 unsigned char * bits,
98 unsigned int at,
99 unsigned int mask,
100 unsigned int value,
101 unsigned int max
102 ) {
103 register int depth,change;
104
105 if (mask == 0) return(0);
106
107 /* auto extend if needed... */
108 if ( (at/8) >= *size) {
109 unsigned int n=*size;
110 unsigned int s= STEP * ( 1 + at/8/STEP );
111 if (s>max) {
112 fprintf(stderr, "Too many bit=%d byte=%d %d of %d\n",
113 at, at/8, s, max);
114 exit(1);
115 };
116 *size = s;
117 bzero(bits+n, s-n);
118 };
119
120 /* x x x x x x as stored
121 * 0 0 1 1 1 0 mask
122 * 0 0 0 1 1 0 value
123 */
124 mask <<= at % 8;
125 value <<= at % 8;
126 at /= 8;
127 change =0; depth = 0;
128 do {
129 register unsigned char d,c;
130 if (at>=max) {
131 fprintf(stderr,"Uncontrolled overflow %d of %d\n",
132 at, max);
133 exit(1);
134 };
135
136 c = bits[ at ];
137 d = ( c & (~ mask) ) | value;
138
139 if (d != c) {
140 bits[ at ] = d;
141 change |= (d ^ c) << depth;
142 };
143 at ++;
144
145 depth += 8;
146 mask >>= 8;
147 value >>= 8;
148
149 } while ((mask) && (at < *size ));
150
151 return (change);
152 };
153
154 /* Return the record number (bit number / 4) of the
155 * first record from record 'at' onwards which has
156 * a bit set within the mask.
157 */
158 unsigned int
rdfstore_bits_getfirstrecord(unsigned int size,unsigned char * bits,unsigned int at,unsigned char mask)159 rdfstore_bits_getfirstrecord (
160 unsigned int size, /* in bytes */
161 unsigned char * bits, /* bit array */
162 unsigned int at, /* as record no (bits/4) */
163 unsigned char mask /* 0000 to 1111 */
164 ) {
165 unsigned mask2 = mask << 4;
166 unsigned i = at >> 1;
167 unsigned char c = bits[i];
168
169 assert(mask < 16);
170 assert(mask != 0);
171
172 if (at & 1)
173 c &= 0xF0;
174
175 do {
176 if ((c & 0x0f) & mask)
177 return 2*i+0;
178 if ((c & 0xf0) & mask2)
179 return 2*i+1;
180 c = bits[ ++i ];
181 } while (i < size);
182
183 return size*2;
184 }
185
186 /*
187 * rdfstore_bits_isanyset - returns != 0 if any bit in the bitmask is set
188 * in addition it returns the positions of the first bit set in at
189 *
190 */
191
rdfstore_bits_isanyset(unsigned int * size,unsigned char * bits,unsigned int * at,unsigned char mask)192 int rdfstore_bits_isanyset(
193 unsigned int * size,
194 unsigned char * bits,
195 unsigned int * at,
196 unsigned char mask
197 ) {
198 register unsigned rest=0;
199 rest = ( *at % 8 );
200 mask = mask << rest;
201 *at /= 8;
202
203 while ((mask) && (*at < *size)) {
204 register int c = bits[ *at ] & mask;
205 if (c) {
206 (*at) *=8;
207 (*at) += rest;
208 return c;
209 };
210 (*at)++;
211 };
212
213 return 0;
214 };
215
216 /*
217 * returns the first bit set from (and including) at in the
218 * bit array of size bytes. If no bit set is found after
219 * the size-est byte of bits; size*8 is returned (i.e. the number
220 * of * last bit (not byte)+1; starting from zero.
221 *
222 * size in bytes
223 * bits unsigned array of bytes with 8 bits each
224 * at location in bits.
225 * return location in bits (or size*8).
226 *
227 */
rdfstore_bits_getfirstsetafter(unsigned int size,unsigned char * bits,unsigned int at)228 unsigned int rdfstore_bits_getfirstsetafter (
229 unsigned int size,
230 unsigned char * bits,
231 unsigned int at
232 ) {
233 register unsigned int i = at >> 3;
234 register unsigned char c = bits[ i ];
235
236 /* first byte is special; skip over the bits
237 * before 'at'.
238 */
239 c &= ( 0xFF << (at & 0x7 ));
240 do {
241 if (c) {
242 i <<= 3;
243 #define _RX(x) if (c & (1<<x)) return (i+x);
244 _RX(0); _RX(1); _RX(2); _RX(3);
245 _RX(4); _RX(5); _RX(6);
246 return (i+7);
247 #undef _RX
248 }
249 i++;
250 c = bits[i];
251 } while (i < size);
252
253 /* Fail; return bits+1. */
254 return size<<3;
255 }
256
257 /* slightly tricky bin-ops; in that it can cope with 'infinitive
258 * lenght type tricks.. Returns the len of the changed bitseq.
259 */
260
261 /*
262 * exor - Exor's to bitvectors to each other, ba of length la and bb of len lb
263 *
264 * returns result in bc (should be preallocated) and length as function result
265 */
266
rdfstore_bits_exor(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)267 unsigned int rdfstore_bits_exor (
268 unsigned int la, unsigned char * ba,
269 unsigned int lb, unsigned char * bb,
270 unsigned char * bc
271 )
272 {
273 register unsigned int len,i;
274 /* set in a, but not set in b
275 * a b -> a|b ^ b
276 * 0 0 0 0
277 * 0 1 1 0
278 * 1 0 1 1
279 * 1 1 1 0
280 */
281 #if 0
282 A real EXOR does
283 00 -> 0
284 10 -> 1
285 01 -> 1
286 11 -> 0
287 #endif
288 for(len=0,i=0; (i<la) || (i<lb); i++) {
289 register unsigned char a = ( i>=la ) ? 0 : ba[i];
290 register unsigned char b = ( i>=lb ) ? 0 : bb[i];
291 #if 0
292 /* real exor */
293 register unsigned char c = a ^ b;
294 #endif
295 register unsigned char c = (a | b) ^ b;
296 if (c) len = i+1;
297 bc[i] = c;
298 };
299 return len;
300 };
301
302 /*
303 * or - Or's to bitvectors to each other, ba of length la and bb of length lb
304 *
305 * returns result in bc (should be preallocated) and length as function result
306 */
307
rdfstore_bits_or(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)308 unsigned int rdfstore_bits_or (
309 unsigned int la, unsigned char * ba,
310 unsigned int lb, unsigned char * bb,
311 unsigned char * bc
312 )
313 {
314 register unsigned int len,i;
315 for(len=0,i=0; (i<la) || (i<lb); i++) {
316 register unsigned char a = ( i>=la ) ? 0 : ba[i];
317 register unsigned char b = ( i>=lb ) ? 0 : bb[i];
318 register unsigned char c = (a | b);
319 if (c) len = i+1;
320 bc[i] = c;
321 };
322 return len;
323 };
324
325
326 /*
327 * and - And's to bitvectors to each other, ba of length la and bb of length lb
328 *
329 * returns result in bc (should be preallocated) and length as function result
330 */
331
rdfstore_bits_and(unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char * bc)332 unsigned int rdfstore_bits_and (
333 unsigned int la, unsigned char * ba,
334 unsigned int lb, unsigned char * bb,
335 unsigned char * bc
336 )
337 {
338 register unsigned int len,i;
339 for(len=0,i=0; (i<la) && (i<lb); i++) {
340 register unsigned char a = ( i>=la ) ? 0 : ba[i];
341 register unsigned char b = ( i>=lb ) ? 0 : bb[i];
342 register unsigned char c = (a & b);
343 if (c) len = i+1;
344 bc[i] = c;
345 };
346
347 return len;
348 };
349
350 /*
351 * not - Not's a bitvector ba of length la
352 *
353 * returns result in bb (should be preallocated) and length as function result
354 */
355
rdfstore_bits_not(unsigned int la,unsigned char * ba,unsigned char * bb)356 unsigned int rdfstore_bits_not (
357 unsigned int la, unsigned char * ba,
358 unsigned char * bb
359 )
360 {
361 register unsigned int len,i;
362 for(len=0,i=0; (i<la) ; i++) {
363 register unsigned char a = ( i>=la ) ? 0 : ba[i];
364 register unsigned char b = ~ a;
365 if (b) len = i+1;
366 bb[i] = b;
367 };
368
369 return len;
370 };
371
372
373 /*
374 * shorten - removes the top zero bits of the bitvector
375 *
376 * returns length of bitvector (without trailing zeroes) as bytes as function
377 * result
378 */
379
rdfstore_bits_shorten(unsigned int la,unsigned char * ba)380 unsigned int rdfstore_bits_shorten(
381 unsigned int la, unsigned char * ba
382 )
383 {
384 while( ( la >0 ) && (ba[la-1] == 0) ) la--;
385 return(la);
386 };
387
388 /* n = 6 - size of a record.
389 * A = row of records; at 1 bit wide.
390 * lenght in bytes, not bits !
391 * B = row of records; each 6 bits wide.
392 * lenght in bytes, not bits !
393 * M = mask of 6 bits.
394 * no lenght
395 * OUT:
396 * bc filled
397 * returns number of bytes in use.
398 *
399 */
rdfstore_bits_and2(int n,unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char mask,unsigned char * bc)400 unsigned int rdfstore_bits_and2(
401 int n,
402 unsigned int la, unsigned char * ba,
403 unsigned int lb, unsigned char * bb,
404 unsigned char mask,
405 unsigned char * bc
406 )
407 {
408 unsigned int i = 0;
409 int endbit = la * 8;
410 assert(n <= 8); /* up to 8 bits - see q+1 below */
411 assert(mask < (1<<n)); /* Mask cannot be bigger than N bits */
412
413 bzero(bc,la); /* Out array of length A max */
414
415 /* If B has less records than A has bits; shorten A
416 */
417 if (lb * 8 / n < endbit)
418 endbit = (lb * 8 / n) * 8;
419
420 #ifdef RDFSTORE_DEBUG_BITS
421 {
422 int i,j=0;
423 printf("rdfstore_bits_and2 la=%d lb=%d endbit=%d endbyte=%d\n",(int)la,(int)lb,endbit,endbit/8);
424 printf("rdfstore_bits_and2 ba -->'");
425 for(i=0;i<8*la;i++) {
426 printf("Rec %d %c\n", i,(ba[i>>3] & (1<<(i&7))) ? '1':'0');
427 };
428 printf("'\n");
429 printf("rdfstore_bits_and2 bb -->'");
430 for(i=0;i<8*lb;i++) {
431 if (i % n == 0) {
432 int a = 0;
433 if (j<8*la) a= ba[j>>3] & (1<<(j&7));
434 printf("Rec %d A=%d ",j,a ? '1':'0');
435 j++;
436 };
437 printf("%c", (bb[i>>3] & (1<<(i&7))) ? '1':'0');
438 if (i % n == n-1) printf("\n");
439 };
440 printf("'\n");
441 printf("rdfstore_bits_and2 mask -->'");
442 for(i=0;i<8;i++) {
443 printf("%c", (mask & (1<<(i&7))) ? '1':'0');
444 };
445 printf("'\n");
446 }
447 #endif
448
449 for(i=0; i < endbit ; i++) {
450 /* Check if bit 'i' is set or not
451 */
452 if (ba[ i>>3 ] & (1<<(i & 7))) {
453 unsigned int p = n * i; /* bit number where the record starts */
454 unsigned int q = p >> 3; /* byte number. */
455 unsigned int r = p & 7; /* bit offset in the byte */
456 unsigned int record;
457
458 /* fetch N bits from the B. Note 8 bits max now; if we have
459 * records of more than 8 bits; then add q + 2.
460 */
461 record = (((bb[ q + 1 ] << 8) + bb[ q ]) >> (r));
462
463 /* If there is one or more bits in the record set; within
464 * the mask; set a bit at recno in the output.
465 */
466
467 if (record & mask) /* and2 */
468 bc[ i >> 3 ] |= (1 << ( i & 7));
469 };
470 };
471
472 #ifdef RDFSTORE_DEBUG_BITS
473 {
474 int j;
475 printf("rdfstore_bits_or2 bc -->'");
476 for(j=0;j<8*(i>>3);j++) {
477 printf("Rec %d %c\n", j,(bc[j>>3] & (1<<(j&7))) ? '1':'0');
478 };
479 printf("'\n");
480 };
481 #endif
482
483 /* Return the lenght in bytes, not bits */
484 return i >> 3;
485 };
486
487 /* n = 6 - size of a record.
488 * A = row of records; at 1 bit wide.
489 * lenght in bytes, not bits !
490 * B = row of records; each 6 bits wide.
491 * lenght in bytes, not bits !
492 * M = mask of 6 bits.
493 * no lenght
494 * OUT:
495 * bc filled
496 * returns number of bytes in use.
497 *
498 */
rdfstore_bits_or2(int n,unsigned int la,unsigned char * ba,unsigned int lb,unsigned char * bb,unsigned char mask,unsigned char * bc)499 unsigned int rdfstore_bits_or2(
500 int n,
501 unsigned int la, unsigned char * ba,
502 unsigned int lb, unsigned char * bb,
503 unsigned char mask,
504 unsigned char * bc
505 )
506 {
507 unsigned int i = 0;
508 int endbit = la * 8;
509 assert(n <= 8); /* up to 8 bits - see q+1 below */
510 assert(mask < (1<<n)); /* Mask cannot be bigger than N bits */
511
512 bzero(bc,la); /* Out array of length A max */
513
514 /* If B has less records than A has bits; shorten A
515 */
516 if (lb * 8 / n < endbit)
517 endbit = (lb * 8 / n) * 8;
518
519 #ifdef RDFSTORE_DEBUG_BITS
520 {
521 int i,j=0;
522 printf("rdfstore_bits_or2 la=%d lb=%d endbit=%d endbyte=%d\n",(int)la,(int)lb,endbit,endbit/8);
523 printf("rdfstore_bits_or2 ba -->'");
524 for(i=0;i<8*la;i++) {
525 printf("Rec %d %c\n", i,(ba[i>>3] & (1<<(i&7))) ? '1':'0');
526 };
527 printf("'\n");
528 printf("rdfstore_bits_or2 bb -->'");
529 for(i=0;i<8*lb;i++) {
530 if (i % n == 0) {
531 int a = 0;
532 if (j<8*la) a= ba[j>>3] & (1<<(j&7));
533 printf("Rec %d A=%d ",j,a ? '1':'0');
534 j++;
535 };
536 printf("%c", (bb[i>>3] & (1<<(i&7))) ? '1':'0');
537 if (i % n == n-1) printf("\n");
538 };
539 printf("'\n");
540 printf("rdfstore_bits_or2 mask -->'");
541 for(i=0;i<8;i++) {
542 printf("%c", (mask & (1<<(i&7))) ? '1':'0');
543 };
544 printf("'\n");
545 }
546 #endif
547
548 for(i=0; i < endbit ; i++) {
549 unsigned int p = n * i; /* bit number where the record starts */
550 unsigned int q = p >> 3; /* byte number. */
551 unsigned int r = p & 7; /* bit offset in the byte */
552 unsigned int record;
553
554 /* fetch N bits from the B. Note 8 bits max now; if we have
555 * records of more than 8 bits; then add q + 2.
556 */
557 record = (((bb[ q + 1 ] << 8) + bb[ q ]) >> (r));
558
559 /* If there is one or more bits in the record set; within
560 * the mask; set a bit at recno in the output.
561 */
562
563 if ( (ba[ i>>3 ] & (1<<(i & 7))) | (record & mask) ) /* or2 */
564 bc[ i >> 3 ] |= (1 << ( i & 7));
565 };
566
567 #ifdef RDFSTORE_DEBUG_BITS
568 {
569 int j;
570 printf("rdfstore_bits_or2 bc -->'");
571 for(j=0;j<8*(i>>3);j++) {
572 printf("Rec %d %c\n", j,(bc[j>>3] & (1<<(j&7))) ? '1':'0');
573 };
574 printf("'\n");
575 };
576 #endif
577
578 /* Return the lenght in bytes, not bits */
579 return i >> 3;
580 };
581