1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #ifndef _h_noarch_bitstr_
28 #define _h_noarch_bitstr_
29
30 #ifndef _h_bitstr_
31 #error "don't include <noarch/bitstr.h> directly - use <bitstr.h>"
32 #endif
33
34 #ifndef _h_klib_defs_
35 #include <klib/defs.h>
36 #endif
37
38 #include <byteswap.h>
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 #ifndef __inline__
45 #define __inline__ inline
46 #endif
47
48 /* bitcpy
49 * copy a string of bits from source to dest
50 *
51 * both source and dest may have non-byte aligned pointers
52 * the number of bits to copy need not be byte aligned
53 *
54 * depending upon architecture and OS conventions, the word
55 * size may be adjusted to 1, 2, or 4 bytes, where the base
56 * pointers are always word aligned.
57 *
58 * bits in memory are always treated as big-endian, meaning
59 * that on multi-byte fetches and stores, we perform byte-swapping
60 * if there are shifts or masks
61 */
62 static __inline__
bitcpy(void * dbase,bitsz_t doff,const void * sbase,bitsz_t soff,bitsz_t sz)63 void bitcpy ( void *dbase, bitsz_t doff, const void *sbase, bitsz_t soff, bitsz_t sz )
64 {
65 /* noop if sz == 0 */
66 if ( sz != 0 )
67 {
68 /* loop counter and destination word count */
69 size_t i, dcountz;
70
71 /* left & right masks and working register */
72 WRD lmask, rmask, reg;
73
74 /* produce word-aligned pointers */
75 #if WRDSIZE == 8
76 /* 1-4. all at once */
77 WRD *dst = ( WRD* ) dbase + ( doff >> WRDSHIFT );
78 const WRD *src = ( const WRD* ) sbase + ( soff >> WRDSHIFT );
79 #else
80 /* 1. capture word alignment adjustment */
81 size_t dadjust = ( size_t ) dbase & ( WRDSIZE / 8 - 1 );
82 size_t sadjust = ( size_t ) sbase & ( WRDSIZE / 8 - 1 );
83
84 /* 2. create word-aligned pointers */
85 WRD *dst = ( WRD* ) ( ( size_t ) dbase - dadjust );
86 const WRD *src = ( const WRD* ) ( ( size_t ) sbase - sadjust );
87
88 /* 3. incorporate alignment adjustment into offset bits */
89 doff += dadjust << 3;
90 soff += sadjust << 3;
91
92 /* 4. readjust pointers based upon offset */
93 dst += doff >> WRDSHIFT;
94 src += soff >> WRDSHIFT;
95 #endif
96 /* 5. restate offsets */
97 doff &= ( WRDSIZE - 1 );
98 soff &= ( WRDSIZE - 1 );
99
100 /* calculate number of words - 1 in dst */
101 dcountz = ( doff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
102
103 /* calculate masks */
104 lmask = rmask = ~ 0;
105 lmask >>= doff;
106 rmask >>= ( doff + sz ) & ( WRDSIZE - 1 );
107 if ( ( WRD ) ( rmask + 1 ) == 0 )
108 rmask = 0;
109
110 /* prime register with masked dst [ 0 ] */
111 reg = BSWAP ( dst [ 0 ] ) & ~ lmask;
112
113 /* if source and destination are aligned */
114 if ( doff == soff )
115 {
116 /* merge src [ 0 ] into reg through mask */
117 reg |= BSWAP ( src [ 0 ] ) & lmask;
118
119 #if WRDSIZE > 8
120 /* straight copies don't need byteswap
121 other than on first and last words
122 put first word back into little-endian
123 for remainder of loop */
124 if ( dcountz > 0 )
125 {
126 reg = BSWAP ( reg );
127 #endif
128 /* aligned buffers have n:n word ratio */
129 for ( i = 0; i < dcountz; )
130 {
131 dst [ i ] = reg;
132 reg = src [ ++ i ];
133 }
134
135 #if WRDSIZE > 8
136 /* revert to big-endian */
137 reg = BSWAP ( reg );
138 }
139 #endif
140 }
141
142 /* shifting alignment */
143 else
144 {
145 /* source count may differ from dest count */
146 size_t scountz = ( soff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
147
148 /* use double-word accumulator */
149 ACC acc = BSWAP ( src [ 0 ] );
150
151 /* shift amount */
152 int shift = ( int ) doff - ( int ) soff;
153 if ( shift > 0 )
154 {
155 /* take only valid bits in shifted initial src */
156 reg |= ( WRD ) ( acc >> shift ) & lmask;
157
158 /* because "shift" > 0, we know "dcountz" >= "scountz" */
159 for ( acc <<= WRDSIZE, i = 0; i < scountz; acc <<= WRDSIZE )
160 {
161 dst [ i ] = BSWAP ( reg );
162 ++ i;
163 acc |= BSWAP ( src [ i ] );
164 reg = ( WRD ) ( acc >> shift );
165 }
166
167 /* if "dcountz" > "scountz" */
168 if ( i < dcountz )
169 {
170 dst [ i ] = BSWAP ( reg );
171 reg = ( WRD ) ( acc >> shift );
172 }
173 }
174
175 else
176 {
177 /* need single word read-ahead and right-shift */
178 shift += WRDSIZE;
179
180 /* because "shift" was < 0, we know "dcountz" <= "scountz" */
181 for ( acc <<= WRDSIZE, i = 0; i < dcountz; acc <<= WRDSIZE )
182 {
183 acc |= BSWAP ( src [ i + 1 ] );
184 reg |= ( WRD ) ( acc >> shift ) & lmask;
185 dst [ i ++ ] = BSWAP ( reg );
186 lmask = ~ 0;
187 reg = 0;
188 }
189
190 /* if "dcountz" < "scountz" */
191 if ( i < scountz )
192 acc |= BSWAP ( src [ scountz ] );
193
194 reg |= ( WRD ) ( acc >> shift ) & lmask;
195 }
196 }
197
198 /* mask off unused bytes from src */
199 reg &= ~ rmask;
200
201 /* bring in saved bits from dst */
202 reg |= BSWAP ( dst [ dcountz ] ) & rmask;
203
204 /* write out last word */
205 dst [ dcountz ] = BSWAP ( reg );
206 }
207 }
208
209 /* bitcmp
210 * performs bitwise a - b, returning result as int
211 * result value has no meaning, only sign
212 * where < 0 means a < b, > 0 means a > b, and 0 means a == b
213 *
214 * since the comparison produces a tri-state indicator of
215 * relative magnitude, the order of "a" and "b" is important.
216 * furthermore, the difference operator must be evaluated
217 * left to right, because the result indicates more than
218 * equality.
219 *
220 * see bitcpy for general word alignment information
221 */
222 static __inline__
bitcmp(const void * abase,bitsz_t aoff,const void * bbase,bitsz_t boff,bitsz_t sz)223 int bitcmp ( const void *abase, bitsz_t aoff, const void *bbase, bitsz_t boff, bitsz_t sz )
224 {
225 int diff = 0;
226
227 if ( sz != 0 )
228 {
229 /* loop counter and left word count */
230 size_t i, lcountz;
231
232 /* left & right masks and working registers */
233 WRD lmask, rmask, lreg, rreg;
234
235 /* produce word-aligned pointers */
236 #if WRDSIZE == 8
237 /* 1-4. all at once */
238 const WRD *left = ( const WRD* ) abase + ( aoff >> WRDSHIFT );
239 const WRD *right = ( const WRD* ) bbase + ( boff >> WRDSHIFT );
240 #else
241 /* 1. capture word alignment adjustment */
242 size_t aadjust = ( size_t ) abase & ( WRDSIZE / 8 - 1 );
243 size_t badjust = ( size_t ) bbase & ( WRDSIZE / 8 - 1 );
244
245 /* 2. create word-aligned pointers */
246 const WRD *left = ( const WRD* ) ( ( size_t ) abase - aadjust );
247 const WRD *right = ( const WRD* ) ( ( size_t ) bbase - badjust );
248
249 /* 3. incorporate alignment adjustment into offset bits */
250 aoff += aadjust << 3;
251 boff += badjust << 3;
252
253 /* 4. readjust pointers based upon offset */
254 left += aoff >> WRDSHIFT;
255 right += boff >> WRDSHIFT;
256 #endif
257 /* 5. restate offsets */
258 aoff &= ( WRDSIZE - 1 );
259 boff &= ( WRDSIZE - 1 );
260
261 /* calculate number of words - 1 in left
262 since we know a-priori that "sz" > 0, we
263 know that the left and right counts must be
264 at least 1. our loops treat the last word
265 specially, so calculate a loop counter that
266 excludes the last word */
267 lcountz = ( aoff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
268
269 /* calculate masks */
270 lmask = rmask = ~ 0;
271 lmask >>= aoff;
272 rmask >>= ( aoff + sz ) & ( WRDSIZE - 1 );
273 if ( ( WRD ) ( rmask + 1 ) == 0 )
274 rmask = 0;
275
276 /* significant bits from left [ 0 ] */
277 lreg = BSWAP ( left [ 0 ] ) & lmask;
278
279 /* if source and destination are aligned */
280 if ( aoff == boff )
281 {
282 /* test against right bits through mask */
283 rreg = BSWAP ( right [ 0 ] ) & lmask;
284
285 /* produce a difference of all but the last
286 aligned word, where initial word has been
287 left-masked. the last word is tested below. */
288 for ( i = 1; i <= lcountz; ++ i )
289 {
290 diff = ( int ) lreg - ( int ) rreg;
291 if ( diff != 0 )
292 return diff;
293
294 /* byte-swapping occurs on little-endian architectures */
295 lreg = BSWAP ( left [ i ] );
296 rreg = BSWAP ( right [ i ] );
297 }
298
299 /* fall out to end for masked comparison of last word */
300 }
301
302 /* shifting alignment */
303 else
304 {
305 /* right count may differ from left count
306 since alignments differ, the span of "sz"
307 bits may hit a different number of words in
308 the left array than in the right. */
309 size_t rcountz = ( boff + sz + ( WRDSIZE - 1 ) - WRDSIZE ) >> WRDSHIFT;
310
311 /* use double-word accumulator
312 note that the extra bits get ignored */
313 ACC acc = BSWAP ( right [ 0 ] );
314
315 /* shift amount: positive if "b" needs to be right shifted.
316 NOTE - since the comparison must be successively performed
317 from left to right ( see above ), shifting is ALWAYS toward
318 right, making for special handling when "shift" < 0 ( see below ) */
319 int shift = ( int ) aoff - ( int ) boff;
320 if ( shift > 0 )
321 {
322 /* initial word from right operand, aligned with left */
323 rreg = ( WRD ) ( acc >> shift ) & lmask;
324
325 /* "shift" > 0 means "lcountz" >= "rcountz" */
326 for ( acc <<= WRDSIZE, i = 1; i <= rcountz; acc <<= WRDSIZE, ++ i )
327 {
328 /* compare words at i-1 */
329 diff = ( int ) lreg - ( int ) rreg;
330 if ( diff != 0 )
331 return diff;
332
333 /* accumulate next word from right operand */
334 acc |= BSWAP ( right [ i ] );
335
336 /* bring in next word from left operand */
337 lreg = BSWAP ( left [ i ] );
338
339 /* produce aligned word from right operand */
340 rreg = ( WRD ) ( acc >> shift );
341 }
342
343 /* if there is one more word in left */
344 if ( lcountz > rcountz )
345 {
346 /* compare penultimate */
347 diff = ( int ) lreg - ( int ) rreg;
348 if ( diff != 0 )
349 return diff;
350
351 /* get last word in left */
352 lreg = BSWAP ( left [ lcountz ] );
353
354 /* last word from right is already in "acc" */
355 rreg = ( WRD ) ( acc >> shift );
356 }
357
358 /* fall out to end for masked comparison of last word */
359 }
360
361 else
362 {
363 /* since all shifts must be toward right ( due to left to right
364 comparison ), this alignment will require a pre-fetch from
365 right operand into accumulator, and adjusting the negative
366 shift amount to a positive right-shift. */
367 shift += WRDSIZE;
368
369 /* since "shift" was negative, we know "lcountz" <= "rcountz",
370 so use "lcountz" as loop limit. pre-shift "acc" as loop init */
371 for ( acc <<= WRDSIZE, i = 1; i <= lcountz; acc <<= WRDSIZE, ++ i )
372 {
373 /* accumulate next word from right operand */
374 acc |= BSWAP ( right [ i ] );
375
376 /* produce aligned word from right operand */
377 rreg = ( WRD ) ( acc >> shift ) & lmask;
378
379 /* now test against left */
380 diff = ( int ) lreg - ( int ) rreg;
381 if ( diff != 0 )
382 return diff;
383
384 /* bring in next word from left operand */
385 lreg = BSWAP ( left [ i ] );
386
387 /* no more left mask */
388 lmask = ~ 0;
389 }
390
391 /* if there is one more word in right */
392 if ( lcountz < rcountz )
393 acc |= BSWAP ( right [ rcountz ] );
394
395 /* produce "rreg" from "acc" */
396 rreg = ( WRD ) ( acc >> shift ) & lmask;
397
398 /* fall out to end for masked comparison of last word */
399 }
400 }
401
402 /* mask off unused bytes from right */
403 lreg &= ~ rmask;
404 rreg &= ~ rmask;
405
406 /* perform final comparison */
407 diff = ( int ) lreg - ( int ) rreg;
408 }
409
410 return diff;
411 }
412
413 #ifdef __cplusplus
414 }
415 #endif
416
417 #endif /* _h_noarch_bitstr_ */
418