1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_noarch_bitstr_
28 #define _h_noarch_bitstr_
29 
30 #ifndef _h_bitstr_
31 #error "don't include <noarch/bitstr.h> directly - use <bitstr.h>"
32 #endif
33 
34 #ifndef _h_klib_defs_
35 #include <klib/defs.h>
36 #endif
37 
38 #include <byteswap.h>
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 /* bitcpy
45  *  copy a string of bits from source to dest
46  *
47  *  both source and dest may have non-byte aligned pointers
48  *  the number of bits to copy need not be byte aligned
49  *
50  *  depending upon architecture and OS conventions, the word
51  *  size may be adjusted to 1, 2, or 4 bytes, where the base
52  *  pointers are always word aligned.
53  *
54  *  bits in memory are always treated as big-endian, meaning
55  *  that on multi-byte fetches and stores, we perform byte-swapping
56  *  if there are shifts or masks
57  */
58 static __inline__
bitcpy(void * dbase,bitsz_t doff,const void * sbase,bitsz_t soff,bitsz_t sz)59 void bitcpy ( void *dbase, bitsz_t doff, const void *sbase, bitsz_t soff, bitsz_t sz )
60 {
61     /* noop if sz == 0 */
62     if ( sz != 0 )
63     {
64         /* loop counter and destination word count */
65         size_t i, dcountz;
66 
67         /* left & right masks and working register */
68         WRD lmask, rmask, reg;
69 
70         /* produce word-aligned pointers */
71 #if WRDSIZE == 8
72         /* 1-4. all at once */
73         WRD *dst = ( WRD* ) dbase + ( doff >> WRDSHIFT );
74         const WRD *src = ( const WRD* ) sbase + ( soff >> WRDSHIFT );
75 #else
76         /* 1. capture word alignment adjustment */
77         size_t dadjust = ( size_t ) dbase & ( WRDSIZE / 8 - 1 );
78         size_t sadjust = ( size_t ) sbase & ( WRDSIZE / 8 - 1 );
79 
80         /* 2. create word-aligned pointers */
81         WRD *dst = ( WRD* ) ( ( size_t ) dbase - dadjust );
82         const WRD *src = ( const WRD* ) ( ( size_t ) sbase - sadjust );
83 
84         /* 3. incorporate alignment adjustment into offset bits */
85         doff += dadjust << 3;
86         soff += sadjust << 3;
87 
88         /* 4. readjust pointers based upon offset */
89         dst += doff >> WRDSHIFT;
90         src += soff >> WRDSHIFT;
91 #endif
92         /* 5. restate offsets */
93         doff &= ( WRDSIZE - 1 );
94         soff &= ( WRDSIZE - 1 );
95 
96         /* calculate number of words - 1 in dst */
97         dcountz = ( doff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
98 
99         /* calculate masks */
100         lmask = rmask = ~ 0;
101         lmask >>= doff;
102         rmask >>= ( doff + sz ) & ( WRDSIZE - 1 );
103         if ( ( WRD ) ( rmask + 1 ) == 0 )
104             rmask = 0;
105 
106         /* prime register with masked dst [ 0 ] */
107         reg = BSWAP ( dst [ 0 ] ) & ~ lmask;
108 
109         /* if source and destination are aligned */
110         if ( doff == soff )
111         {
112             /* merge src [ 0 ] into reg through mask */
113             reg |= BSWAP ( src [ 0 ] ) & lmask;
114 
115 #if WRDSIZE > 8
116             /* straight copies don't need byteswap
117                other than on first and last words
118                put first word back into little-endian
119                for remainder of loop */
120             if ( dcountz > 0 )
121             {
122                 reg = BSWAP ( reg );
123 #endif
124                 /* aligned buffers have n:n word ratio */
125                 for ( i = 0; i < dcountz; )
126                 {
127                     dst [ i ] = reg;
128                     reg = src [ ++ i ];
129                 }
130 
131 #if WRDSIZE > 8
132                 /* revert to big-endian */
133                 reg = BSWAP ( reg );
134             }
135 #endif
136         }
137 
138         /* shifting alignment  */
139         else
140         {
141             /* source count may differ from dest count */
142             size_t scountz = ( soff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
143 
144             /* use double-word accumulator */
145             ACC acc = BSWAP ( src [ 0 ] );
146 
147             /* shift amount */
148             int shift = ( int ) doff - ( int ) soff;
149             if ( shift > 0 )
150             {
151                 /* take only valid bits in shifted initial src */
152                 reg |= ( WRD ) ( acc >> shift ) & lmask;
153 
154                 /* because "shift" > 0, we know "dcountz" >= "scountz" */
155                 for ( acc <<= WRDSIZE, i = 0; i < scountz; acc <<= WRDSIZE )
156                 {
157                     dst [ i ] = BSWAP ( reg );
158                     ++ i;
159                     acc |= BSWAP ( src [ i ] );
160                     reg = ( WRD ) ( acc >> shift );
161                 }
162 
163                 /* if "dcountz" > "scountz" */
164                 if ( i < dcountz )
165                 {
166                     dst [ i ] = BSWAP ( reg );
167                     reg = ( WRD ) ( acc >> shift );
168                 }
169             }
170 
171             else
172             {
173                 /* need single word read-ahead and right-shift */
174                 shift += WRDSIZE;
175 
176                 /* because "shift" was < 0, we know "dcountz" <= "scountz" */
177                 for ( acc <<= WRDSIZE, i = 0; i < dcountz; acc <<= WRDSIZE )
178                 {
179                     acc |= BSWAP ( src [ i + 1 ] );
180                     reg |= ( WRD ) ( acc >> shift ) & lmask;
181                     dst [ i ++ ] = BSWAP ( reg );
182                     lmask = ~ 0;
183                     reg = 0;
184                 }
185 
186                 /* if "dcountz" < "scountz" */
187                 if ( i < scountz )
188                     acc |= BSWAP ( src [ scountz ] );
189 
190                 reg |= ( WRD ) ( acc >> shift ) & lmask;
191             }
192         }
193 
194         /* mask off unused bytes from src */
195         reg &= ~ rmask;
196 
197         /* bring in saved bits from dst */
198         reg |= BSWAP ( dst [ dcountz ] ) & rmask;
199 
200         /* write out last word */
201         dst [ dcountz ] = BSWAP ( reg );
202     }
203 }
204 
205 /* bitcmp
206  *  performs bitwise a - b, returning result as int
207  *  result value has no meaning, only sign
208  *  where < 0 means a < b, > 0 means a > b, and 0 means a == b
209  *
210  *  since the comparison produces a tri-state indicator of
211  *  relative magnitude, the order of "a" and "b" is important.
212  *  furthermore, the difference operator must be evaluated
213  *  left to right, because the result indicates more than
214  *  equality.
215  *
216  *  see bitcpy for general word alignment information
217  */
218 static __inline__
bitcmp(const void * abase,bitsz_t aoff,const void * bbase,bitsz_t boff,bitsz_t sz)219 int bitcmp ( const void *abase, bitsz_t aoff, const void *bbase, bitsz_t boff, bitsz_t sz )
220 {
221     int diff = 0;
222 
223     if ( sz != 0 )
224     {
225         /* loop counter and left word count */
226         size_t i, lcountz;
227 
228         /* left & right masks and working registers */
229         WRD lmask, rmask, lreg, rreg;
230 
231         /* produce word-aligned pointers */
232 #if WRDSIZE == 8
233         /* 1-4. all at once */
234         const WRD *left = ( const WRD* ) abase + ( aoff >> WRDSHIFT );
235         const WRD *right = ( const WRD* ) bbase + ( boff >> WRDSHIFT );
236 #else
237         /* 1. capture word alignment adjustment */
238         size_t aadjust = ( size_t ) abase & ( WRDSIZE / 8 - 1 );
239         size_t badjust = ( size_t ) bbase & ( WRDSIZE / 8 - 1 );
240 
241         /* 2. create word-aligned pointers */
242         const WRD *left = ( const WRD* ) ( ( size_t ) abase - aadjust );
243         const WRD *right = ( const WRD* ) ( ( size_t ) bbase - badjust );
244 
245         /* 3. incorporate alignment adjustment into offset bits */
246         aoff += aadjust << 3;
247         boff += badjust << 3;
248 
249         /* 4. readjust pointers based upon offset */
250         left += aoff >> WRDSHIFT;
251         right += boff >> WRDSHIFT;
252 #endif
253         /* 5. restate offsets */
254         aoff &= ( WRDSIZE - 1 );
255         boff &= ( WRDSIZE - 1 );
256 
257         /* calculate number of words - 1 in left
258            since we know a-priori that "sz" > 0, we
259            know that the left and right counts must be
260            at least 1. our loops treat the last word
261            specially, so calculate a loop counter that
262            excludes the last word */
263         lcountz = ( aoff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
264 
265         /* calculate masks */
266         lmask = rmask = ~ 0;
267         lmask >>= aoff;
268         rmask >>= ( aoff + sz ) & ( WRDSIZE - 1 );
269         if ( ( WRD ) ( rmask + 1 ) == 0 )
270             rmask = 0;
271 
272         /* significant bits from left [ 0 ] */
273         lreg = BSWAP ( left [ 0 ] ) & lmask;
274 
275         /* if source and destination are aligned */
276         if ( aoff == boff )
277         {
278             /* test against right bits through mask */
279             rreg = BSWAP ( right [ 0 ] ) & lmask;
280 
281             /* produce a difference of all but the last
282                aligned word, where initial word has been
283                left-masked. the last word is tested below. */
284             for ( i = 1; i <= lcountz; ++ i )
285             {
286                 diff = ( int ) lreg - ( int ) rreg;
287                 if ( diff != 0 )
288                     return diff;
289 
290                 /* byte-swapping occurs on little-endian architectures */
291                 lreg = BSWAP ( left [ i ] );
292                 rreg = BSWAP ( right [ i ] );
293             }
294 
295             /* fall out to end for masked comparison of last word */
296         }
297 
298         /* shifting alignment */
299         else
300         {
301             /* right count may differ from left count
302                since alignments differ, the span of "sz"
303                bits may hit a different number of words in
304                the left array than in the right. */
305             size_t rcountz = ( boff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
306 
307             /* use double-word accumulator
308                note that the extra bits get ignored */
309             ACC acc = BSWAP ( right [ 0 ] );
310 
311             /* shift amount: positive if "b" needs to be right shifted.
312                NOTE - since the comparison must be successively performed
313                from left to right ( see above ), shifting is ALWAYS toward
314                right, making for special handling when "shift" < 0 ( see below ) */
315             int shift = ( int ) aoff - ( int ) boff;
316             if ( shift > 0 )
317             {
318                 /* initial word from right operand, aligned with left */
319                 rreg = ( WRD ) ( acc >> shift ) & lmask;
320 
321                 /* "shift" > 0 means "lcountz" >= "rcountz" */
322                 for ( acc <<= WRDSIZE, i = 1; i <= rcountz; acc <<= WRDSIZE, ++ i )
323                 {
324                     /* compare words at i-1 */
325                     diff = ( int ) lreg - ( int ) rreg;
326                     if ( diff != 0 )
327                         return diff;
328 
329                     /* accumulate next word from right operand */
330                     acc |= BSWAP ( right [ i ] );
331 
332                     /* bring in next word from left operand */
333                     lreg = BSWAP ( left [ i ] );
334 
335                     /* produce aligned word from right operand */
336                     rreg = ( WRD ) ( acc >> shift );
337                 }
338 
339                 /* if there is one more word in left */
340                 if ( lcountz > rcountz )
341                 {
342                     /* compare penultimate */
343                     diff = ( int ) lreg - ( int ) rreg;
344                     if ( diff != 0 )
345                         return diff;
346 
347                     /* get last word in left */
348                     lreg = BSWAP ( left [ lcountz ] );
349 
350                     /* last word from right is already in "acc" */
351                     rreg = ( WRD ) ( acc >> shift );
352                 }
353 
354                 /* fall out to end for masked comparison of last word */
355             }
356 
357             else
358             {
359                 /* since all shifts must be toward right ( due to left to right
360                    comparison ), this alignment will require a pre-fetch from
361                    right operand into accumulator, and adjusting the negative
362                    shift amount to a positive right-shift. */
363                 shift += WRDSIZE;
364 
365                 /* since "shift" was negative, we know "lcountz" <= "rcountz",
366                    so use "lcountz" as loop limit. pre-shift "acc" as loop init */
367                 for ( acc <<= WRDSIZE, i = 1; i <= lcountz; acc <<= WRDSIZE, ++ i )
368                 {
369                     /* accumulate next word from right operand */
370                     acc |= BSWAP ( right [ i ] );
371 
372                     /* produce aligned word from right operand */
373                     rreg = ( WRD ) ( acc >> shift ) & lmask;
374 
375                     /* now test against left */
376                     diff = ( int ) lreg - ( int ) rreg;
377                     if ( diff != 0 )
378                         return diff;
379 
380                     /* bring in next word from left operand */
381                     lreg = BSWAP ( left [ i ] );
382 
383                     /* no more left mask */
384                     lmask = ~ 0;
385                 }
386 
387                 /* if there is one more word in right */
388                 if ( lcountz < rcountz )
389                     acc |= BSWAP ( right [ rcountz ] );
390 
391                 /* produce "rreg" from "acc" */
392                 rreg = ( WRD ) ( acc >> shift ) & lmask;
393 
394                 /* fall out to end for masked comparison of last word */
395             }
396         }
397 
398         /* mask off unused bytes from right */
399         lreg &= ~ rmask;
400         rreg &= ~ rmask;
401 
402         /* perform final comparison */
403         diff = ( int ) lreg - ( int ) rreg;
404     }
405 
406     return diff;
407 }
408 
409 #ifdef __cplusplus
410 }
411 #endif
412 
413 #endif /* _h_noarch_bitstr_ */
414