1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_noarch_bitstr_
28 #define _h_noarch_bitstr_
29 
30 #ifndef _h_bitstr_
31 #error "don't include <noarch/bitstr.h> directly - use <bitstr.h>"
32 #endif
33 
34 #ifndef _h_klib_defs_
35 #include <klib/defs.h>
36 #endif
37 
38 #include <byteswap.h>
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 #ifndef __inline__
45 #define __inline__ inline
46 #endif
47 
48 /* bitcpy
49  *  copy a string of bits from source to dest
50  *
51  *  both source and dest may have non-byte aligned pointers
52  *  the number of bits to copy need not be byte aligned
53  *
54  *  depending upon architecture and OS conventions, the word
55  *  size may be adjusted to 1, 2, or 4 bytes, where the base
56  *  pointers are always word aligned.
57  *
58  *  bits in memory are always treated as big-endian, meaning
59  *  that on multi-byte fetches and stores, we perform byte-swapping
60  *  if there are shifts or masks
61  */
62 static __inline__
bitcpy(void * dbase,bitsz_t doff,const void * sbase,bitsz_t soff,bitsz_t sz)63 void bitcpy ( void *dbase, bitsz_t doff, const void *sbase, bitsz_t soff, bitsz_t sz )
64 {
65     /* noop if sz == 0 */
66     if ( sz != 0 )
67     {
68         /* loop counter and destination word count */
69         size_t i, dcountz;
70 
71         /* left & right masks and working register */
72         WRD lmask, rmask, reg;
73 
74         /* produce word-aligned pointers */
75 #if WRDSIZE == 8
76         /* 1-4. all at once */
77         WRD *dst = ( WRD* ) dbase + ( doff >> WRDSHIFT );
78         const WRD *src = ( const WRD* ) sbase + ( soff >> WRDSHIFT );
79 #else
80         /* 1. capture word alignment adjustment */
81         size_t dadjust = ( size_t ) dbase & ( WRDSIZE / 8 - 1 );
82         size_t sadjust = ( size_t ) sbase & ( WRDSIZE / 8 - 1 );
83 
84         /* 2. create word-aligned pointers */
85         WRD *dst = ( WRD* ) ( ( size_t ) dbase - dadjust );
86         const WRD *src = ( const WRD* ) ( ( size_t ) sbase - sadjust );
87 
88         /* 3. incorporate alignment adjustment into offset bits */
89         doff += dadjust << 3;
90         soff += sadjust << 3;
91 
92         /* 4. readjust pointers based upon offset */
93         dst += doff >> WRDSHIFT;
94         src += soff >> WRDSHIFT;
95 #endif
96         /* 5. restate offsets */
97         doff &= ( WRDSIZE - 1 );
98         soff &= ( WRDSIZE - 1 );
99 
100         /* calculate number of words - 1 in dst */
101         dcountz = ( doff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
102 
103         /* calculate masks */
104         lmask = rmask = ~ 0;
105         lmask >>= doff;
106         rmask >>= ( doff + sz ) & ( WRDSIZE - 1 );
107         if ( ( WRD ) ( rmask + 1 ) == 0 )
108             rmask = 0;
109 
110         /* prime register with masked dst [ 0 ] */
111         reg = BSWAP ( dst [ 0 ] ) & ~ lmask;
112 
113         /* if source and destination are aligned */
114         if ( doff == soff )
115         {
116             /* merge src [ 0 ] into reg through mask */
117             reg |= BSWAP ( src [ 0 ] ) & lmask;
118 
119 #if WRDSIZE > 8
120             /* straight copies don't need byteswap
121                other than on first and last words
122                put first word back into little-endian
123                for remainder of loop */
124             if ( dcountz > 0 )
125             {
126                 reg = BSWAP ( reg );
127 #endif
128                 /* aligned buffers have n:n word ratio */
129                 for ( i = 0; i < dcountz; )
130                 {
131                     dst [ i ] = reg;
132                     reg = src [ ++ i ];
133                 }
134 
135 #if WRDSIZE > 8
136                 /* revert to big-endian */
137                 reg = BSWAP ( reg );
138             }
139 #endif
140         }
141 
142         /* shifting alignment  */
143         else
144         {
145             /* source count may differ from dest count */
146             size_t scountz = ( soff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
147 
148             /* use double-word accumulator */
149             ACC acc = BSWAP ( src [ 0 ] );
150 
151             /* shift amount */
152             int shift = ( int ) doff - ( int ) soff;
153             if ( shift > 0 )
154             {
155                 /* take only valid bits in shifted initial src */
156                 reg |= ( WRD ) ( acc >> shift ) & lmask;
157 
158                 /* because "shift" > 0, we know "dcountz" >= "scountz" */
159                 for ( acc <<= WRDSIZE, i = 0; i < scountz; acc <<= WRDSIZE )
160                 {
161                     dst [ i ] = BSWAP ( reg );
162                     ++ i;
163                     acc |= BSWAP ( src [ i ] );
164                     reg = ( WRD ) ( acc >> shift );
165                 }
166 
167                 /* if "dcountz" > "scountz" */
168                 if ( i < dcountz )
169                 {
170                     dst [ i ] = BSWAP ( reg );
171                     reg = ( WRD ) ( acc >> shift );
172                 }
173             }
174 
175             else
176             {
177                 /* need single word read-ahead and right-shift */
178                 shift += WRDSIZE;
179 
180                 /* because "shift" was < 0, we know "dcountz" <= "scountz" */
181                 for ( acc <<= WRDSIZE, i = 0; i < dcountz; acc <<= WRDSIZE )
182                 {
183                     acc |= BSWAP ( src [ i + 1 ] );
184                     reg |= ( WRD ) ( acc >> shift ) & lmask;
185                     dst [ i ++ ] = BSWAP ( reg );
186                     lmask = ~ 0;
187                     reg = 0;
188                 }
189 
190                 /* if "dcountz" < "scountz" */
191                 if ( i < scountz )
192                     acc |= BSWAP ( src [ scountz ] );
193 
194                 reg |= ( WRD ) ( acc >> shift ) & lmask;
195             }
196         }
197 
198         /* mask off unused bytes from src */
199         reg &= ~ rmask;
200 
201         /* bring in saved bits from dst */
202         reg |= BSWAP ( dst [ dcountz ] ) & rmask;
203 
204         /* write out last word */
205         dst [ dcountz ] = BSWAP ( reg );
206     }
207 }
208 
209 /* bitcmp
210  *  performs bitwise a - b, returning result as int
211  *  result value has no meaning, only sign
212  *  where < 0 means a < b, > 0 means a > b, and 0 means a == b
213  *
214  *  since the comparison produces a tri-state indicator of
215  *  relative magnitude, the order of "a" and "b" is important.
216  *  furthermore, the difference operator must be evaluated
217  *  left to right, because the result indicates more than
218  *  equality.
219  *
220  *  see bitcpy for general word alignment information
221  */
222 static __inline__
bitcmp(const void * abase,bitsz_t aoff,const void * bbase,bitsz_t boff,bitsz_t sz)223 int bitcmp ( const void *abase, bitsz_t aoff, const void *bbase, bitsz_t boff, bitsz_t sz )
224 {
225     int diff = 0;
226 
227     if ( sz != 0 )
228     {
229         /* loop counter and left word count */
230         size_t i, lcountz;
231 
232         /* left & right masks and working registers */
233         WRD lmask, rmask, lreg, rreg;
234 
235         /* produce word-aligned pointers */
236 #if WRDSIZE == 8
237         /* 1-4. all at once */
238         const WRD *left = ( const WRD* ) abase + ( aoff >> WRDSHIFT );
239         const WRD *right = ( const WRD* ) bbase + ( boff >> WRDSHIFT );
240 #else
241         /* 1. capture word alignment adjustment */
242         size_t aadjust = ( size_t ) abase & ( WRDSIZE / 8 - 1 );
243         size_t badjust = ( size_t ) bbase & ( WRDSIZE / 8 - 1 );
244 
245         /* 2. create word-aligned pointers */
246         const WRD *left = ( const WRD* ) ( ( size_t ) abase - aadjust );
247         const WRD *right = ( const WRD* ) ( ( size_t ) bbase - badjust );
248 
249         /* 3. incorporate alignment adjustment into offset bits */
250         aoff += aadjust << 3;
251         boff += badjust << 3;
252 
253         /* 4. readjust pointers based upon offset */
254         left += aoff >> WRDSHIFT;
255         right += boff >> WRDSHIFT;
256 #endif
257         /* 5. restate offsets */
258         aoff &= ( WRDSIZE - 1 );
259         boff &= ( WRDSIZE - 1 );
260 
261         /* calculate number of words - 1 in left
262            since we know a-priori that "sz" > 0, we
263            know that the left and right counts must be
264            at least 1. our loops treat the last word
265            specially, so calculate a loop counter that
266            excludes the last word */
267         lcountz = ( aoff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
268 
269         /* calculate masks */
270         lmask = rmask = ~ 0;
271         lmask >>= aoff;
272         rmask >>= ( aoff + sz ) & ( WRDSIZE - 1 );
273         if ( ( WRD ) ( rmask + 1 ) == 0 )
274             rmask = 0;
275 
276         /* significant bits from left [ 0 ] */
277         lreg = BSWAP ( left [ 0 ] ) & lmask;
278 
279         /* if source and destination are aligned */
280         if ( aoff == boff )
281         {
282             /* test against right bits through mask */
283             rreg = BSWAP ( right [ 0 ] ) & lmask;
284 
285             /* produce a difference of all but the last
286                aligned word, where initial word has been
287                left-masked. the last word is tested below. */
288             for ( i = 1; i <= lcountz; ++ i )
289             {
290                 diff = ( int ) lreg - ( int ) rreg;
291                 if ( diff != 0 )
292                     return diff;
293 
294                 /* byte-swapping occurs on little-endian architectures */
295                 lreg = BSWAP ( left [ i ] );
296                 rreg = BSWAP ( right [ i ] );
297             }
298 
299             /* fall out to end for masked comparison of last word */
300         }
301 
302         /* shifting alignment */
303         else
304         {
305             /* right count may differ from left count
306                since alignments differ, the span of "sz"
307                bits may hit a different number of words in
308                the left array than in the right. */
309             size_t rcountz = ( boff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
310 
311             /* use double-word accumulator
312                note that the extra bits get ignored */
313             ACC acc = BSWAP ( right [ 0 ] );
314 
315             /* shift amount: positive if "b" needs to be right shifted.
316                NOTE - since the comparison must be successively performed
317                from left to right ( see above ), shifting is ALWAYS toward
318                right, making for special handling when "shift" < 0 ( see below ) */
319             int shift = ( int ) aoff - ( int ) boff;
320             if ( shift > 0 )
321             {
322                 /* initial word from right operand, aligned with left */
323                 rreg = ( WRD ) ( acc >> shift ) & lmask;
324 
325                 /* "shift" > 0 means "lcountz" >= "rcountz" */
326                 for ( acc <<= WRDSIZE, i = 1; i <= rcountz; acc <<= WRDSIZE, ++ i )
327                 {
328                     /* compare words at i-1 */
329                     diff = ( int ) lreg - ( int ) rreg;
330                     if ( diff != 0 )
331                         return diff;
332 
333                     /* accumulate next word from right operand */
334                     acc |= BSWAP ( right [ i ] );
335 
336                     /* bring in next word from left operand */
337                     lreg = BSWAP ( left [ i ] );
338 
339                     /* produce aligned word from right operand */
340                     rreg = ( WRD ) ( acc >> shift );
341                 }
342 
343                 /* if there is one more word in left */
344                 if ( lcountz > rcountz )
345                 {
346                     /* compare penultimate */
347                     diff = ( int ) lreg - ( int ) rreg;
348                     if ( diff != 0 )
349                         return diff;
350 
351                     /* get last word in left */
352                     lreg = BSWAP ( left [ lcountz ] );
353 
354                     /* last word from right is already in "acc" */
355                     rreg = ( WRD ) ( acc >> shift );
356                 }
357 
358                 /* fall out to end for masked comparison of last word */
359             }
360 
361             else
362             {
363                 /* since all shifts must be toward right ( due to left to right
364                    comparison ), this alignment will require a pre-fetch from
365                    right operand into accumulator, and adjusting the negative
366                    shift amount to a positive right-shift. */
367                 shift += WRDSIZE;
368 
369                 /* since "shift" was negative, we know "lcountz" <= "rcountz",
370                    so use "lcountz" as loop limit. pre-shift "acc" as loop init */
371                 for ( acc <<= WRDSIZE, i = 1; i <= lcountz; acc <<= WRDSIZE, ++ i )
372                 {
373                     /* accumulate next word from right operand */
374                     acc |= BSWAP ( right [ i ] );
375 
376                     /* produce aligned word from right operand */
377                     rreg = ( WRD ) ( acc >> shift ) & lmask;
378 
379                     /* now test against left */
380                     diff = ( int ) lreg - ( int ) rreg;
381                     if ( diff != 0 )
382                         return diff;
383 
384                     /* bring in next word from left operand */
385                     lreg = BSWAP ( left [ i ] );
386 
387                     /* no more left mask */
388                     lmask = ~ 0;
389                 }
390 
391                 /* if there is one more word in right */
392                 if ( lcountz < rcountz )
393                     acc |= BSWAP ( right [ rcountz ] );
394 
395                 /* produce "rreg" from "acc" */
396                 rreg = ( WRD ) ( acc >> shift ) & lmask;
397 
398                 /* fall out to end for masked comparison of last word */
399             }
400         }
401 
402         /* mask off unused bytes from right */
403         lreg &= ~ rmask;
404         rreg &= ~ rmask;
405 
406         /* perform final comparison */
407         diff = ( int ) lreg - ( int ) rreg;
408     }
409 
410     return diff;
411 }
412 
413 #ifdef __cplusplus
414 }
415 #endif
416 
417 #endif /* _h_noarch_bitstr_ */
418