1 /****************************************************************************
2 **
3 ** This file is part of GAP, a system for computational discrete algebra.
4 **
5 ** Copyright of GAP belongs to its developers, whose names are too numerous
6 ** to list here. Please refer to the COPYRIGHT file for details.
7 **
8 ** SPDX-License-Identifier: GPL-2.0-or-later
9 **
10 ** This file declares the functions that mainly operate on boolean lists.
11 ** Because boolean lists are just a special case of lists many things are
12 ** done in the list package.
13 **
14 ** A *boolean list* is a list that has no holes and contains only 'true' and
15 ** 'false'. For the full definition of boolean list see chapter "Boolean
16 ** Lists" in the {\GAP} Manual. Read also the section "More about Boolean
17 ** Lists" about the different internal representations of such lists.
18 */
19
20 #ifndef GAP_BLISTER_H
21 #define GAP_BLISTER_H
22
23 #include "bool.h"
24 #include "objects.h"
25
26 /****************************************************************************
27 **
28 *F IS_BLIST_REP( <list> ) . . . . . check if <list> is in boolean list rep
29 */
IS_BLIST_REP(Obj list)30 EXPORT_INLINE Int IS_BLIST_REP(Obj list)
31 {
32 return T_BLIST <= TNUM_OBJ(list) &&
33 TNUM_OBJ(list) <= T_BLIST_SSORT + IMMUTABLE;
34 }
35
36
37 /****************************************************************************
38 **
39 *F SIZE_PLEN_BLIST( <plen> ) . . size for a blist with given physical length
40 **
41 ** 'SIZE_PLEN_BLIST' returns the size that a boolean list with room for
42 ** <plen> elements must at least have.
43 **
44 */
SIZE_PLEN_BLIST(Int plen)45 EXPORT_INLINE Int SIZE_PLEN_BLIST(Int plen)
46 {
47 GAP_ASSERT(plen >= 0);
48 return sizeof(Obj) + (plen + BIPEB - 1) / BIPEB * sizeof(UInt);
49 }
50
51 /****************************************************************************
52 **
53 *F LEN_BLIST( <list> ) . . . . . . . . . . . . . . length of a boolean list
54 **
55 ** 'LEN_BLIST' returns the logical length of the boolean list <list>, as a C
56 ** integer.
57 **
58 */
LEN_BLIST(Obj list)59 EXPORT_INLINE Int LEN_BLIST(Obj list)
60 {
61 GAP_ASSERT(IS_BLIST_REP(list));
62 return INT_INTOBJ(CONST_ADDR_OBJ(list)[0]);
63 }
64
65
66 /***************************************************************************
67 **
68 *F NUMBER_BLOCKS_BLIST(<list>) . . . . . . . . number of UInt blocks in list
69 **
70 */
NUMBER_BLOCKS_BLIST(Obj blist)71 EXPORT_INLINE Int NUMBER_BLOCKS_BLIST(Obj blist)
72 {
73 GAP_ASSERT(IS_BLIST_REP(blist));
74 return (LEN_BLIST(blist) + BIPEB - 1) / BIPEB;
75 }
76
77
78 /****************************************************************************
79 **
80 *F SET_LEN_BLIST( <list>, <len> ) . . . . set the length of a boolean list
81 **
82 ** 'SET_LEN_BLIST' sets the length of the boolean list <list> to the value
83 ** <len>, which must be a positive C integer.
84 **
85 */
SET_LEN_BLIST(Obj list,Int len)86 EXPORT_INLINE void SET_LEN_BLIST(Obj list, Int len)
87 {
88 GAP_ASSERT(IS_BLIST_REP(list));
89 GAP_ASSERT(len >= 0);
90 ADDR_OBJ(list)[0] = INTOBJ_INT(len);
91 }
92
93
94 /****************************************************************************
95 **
96 *F BLOCKS_BLIST( <list> ) . . . . . . . . . . first block of a boolean list
97 **
98 ** returns a pointer to the start of the data of the Boolean list
99 **
100 */
BLOCKS_BLIST(Obj list)101 EXPORT_INLINE UInt * BLOCKS_BLIST(Obj list)
102 {
103 return ((UInt *)(ADDR_OBJ(list) + 1));
104 }
105
CONST_BLOCKS_BLIST(Obj list)106 EXPORT_INLINE const UInt * CONST_BLOCKS_BLIST(Obj list)
107 {
108 return ((const UInt *)(CONST_ADDR_OBJ(list) + 1));
109 }
110
111 /****************************************************************************
112 **
113 *F BLOCK_ELM_BLIST_PTR( <list>, <pos> ) . . ptr to a block of a boolean list
114 **
115 ** 'BLOCK_ELM_BLIST_PTR' return a pointer to the block containing the
116 ** <pos>-th element of the boolean list <list>. <pos> must be a positive
117 ** integer less than or equal to the length of <list>.
118 */
BLOCK_ELM_BLIST_PTR(Obj list,UInt pos)119 EXPORT_INLINE UInt * BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
120 {
121 return BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
122 }
123
CONST_BLOCK_ELM_BLIST_PTR(Obj list,UInt pos)124 EXPORT_INLINE const UInt * CONST_BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
125 {
126 return CONST_BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
127 }
128
129 /****************************************************************************
130 **
131 *F MASK_POS_BLIST( <pos> ) . . . . bit mask for position of a Boolean list
132 **
133 ** 'MASK_POS_BLIST(<pos>)' returns a UInt with a single set bit in position
134 ** '(<pos>-1) % BIPEB',
135 ** useful for accessing the <pos>-th element of a blist.
136 */
MASK_POS_BLIST(UInt pos)137 EXPORT_INLINE UInt MASK_POS_BLIST(UInt pos)
138 {
139 return ((UInt)1) << (pos - 1) % BIPEB;
140 }
141
142 /****************************************************************************
143 **
144 *F TEST_BIT_BLIST( <list>, <pos> ) . . . . . . test a bit of a boolean list
145 **
146 ** 'TEST_BIT_BLIST' return a non-zero value if the <pos>-th element of the
147 ** boolean list <list> is 1, and otherwise 0. <pos> must be a positive
148 ** integer less than or equal to the length of <list>.
149 */
TEST_BIT_BLIST(Obj list,UInt pos)150 EXPORT_INLINE Int TEST_BIT_BLIST(Obj list, UInt pos)
151 {
152 return *CONST_BLOCK_ELM_BLIST_PTR(list, pos) & MASK_POS_BLIST(pos);
153 }
154
155 /****************************************************************************
156 **
157 *F ELM_BLIST( <list>, <pos> ) . . . . . . . . . . element of a boolean list
158 **
159 ** 'ELM_BLIST' return the <pos>-th bit of the boolean list <list>, which is
160 ** either 'true' or 'false'. <pos> must be a positive integer less than or
161 ** equal to the length of <list>.
162 */
ELM_BLIST(Obj list,UInt pos)163 EXPORT_INLINE Obj ELM_BLIST(Obj list, UInt pos)
164 {
165 return TEST_BIT_BLIST(list, pos) ? True : False;
166 }
167
168 /****************************************************************************
169 **
170 *F SET_BIT_BLIST( <list>, <pos> ) . . . . . . . set a bit of a boolean list
171 *F CLEAR_BIT_BLIST( <list>, <pos> ) . . . . . clears a bit of a boolean list
172 **
173 ** These function set the bit at position <pos> in the boolean list <list>
174 ** to 1 resp. 0. <pos> must be a positive integer less than or equal to
175 ** the length of <list>.
176 */
SET_BIT_BLIST(Obj list,UInt pos)177 EXPORT_INLINE void SET_BIT_BLIST(Obj list, UInt pos)
178 {
179 *BLOCK_ELM_BLIST_PTR(list, pos) |= MASK_POS_BLIST(pos);
180 }
181
CLEAR_BIT_BLIST(Obj list,UInt pos)182 EXPORT_INLINE void CLEAR_BIT_BLIST(Obj list, UInt pos)
183 {
184 *BLOCK_ELM_BLIST_PTR(list, pos) &= ~MASK_POS_BLIST(pos);
185 }
186
187
188 /****************************************************************************
189 **
190 *F * * * * * * * * * * * * * * list functions * * * * * * * * * * * * * * * *
191 */
192
193 /****************************************************************************
194 **
195 *F AssBlist( <list>, <pos>, <val> ) . . . . . . . assign to a boolean list
196 **
197 ** 'AssBlist' assigns the value <val> to the boolean list <list> at the
198 ** position <pos>. It is the responsibility of the caller to ensure that
199 ** <pos> is positive, and that <val> is not 0.
200 **
201 ** 'AssBlist' is the function in 'AssListFuncs' for boolean lists.
202 **
203 ** If <pos> is less than or equal to the logical length of the boolean list
204 ** and <val> is 'true' or 'false' the assignment is done by setting the
205 ** corresponding bit. If <pos> is one more than the logical length of the
206 ** boolean list the assignment is done by resizing the boolean list if
207 ** necessary, setting the corresponding bit and incrementing the logical
208 ** length by one. Otherwise the boolean list is converted to an ordinary
209 ** list and the assignment is performed the ordinary way.
210 */
211 void AssBlist(Obj list, Int pos, Obj val);
212
213
214 /****************************************************************************
215 **
216 *F ConvBlist( <list> ) . . . . . . . . . convert a list into a boolean list
217 **
218 ** 'ConvBlist' changes the representation of boolean lists into the compact
219 ** representation of type 'T_BLIST' described above.
220 */
221 void ConvBlist(Obj list);
222
223
224 /****************************************************************************
225 **
226 *F COUNT_TRUES_BLOCK( <block> ) . . . . . . . . . . . count number of trues
227 **
228 ** 'COUNT_TRUES_BLOCK( <block> )' returns the number of 1 bits in the
229 ** UInt <block>. Two implementations are included below. One uses the
230 ** gcc builtin __builtin_popcount which usually generates the popcntl
231 ** or popcntq instruction on sufficiently recent CPUs. The other uses
232 ** the algorithm described in the original comment below:
233 **
234 ** The sequence to compute the number of bits in a block is quite clever.
235 ** The idea is that after the <i>-th instruction each subblock of $2^i$ bits
236 ** holds the number of bits of this subblock in the original block <m>.
237 ** This is illustrated in the example below for a block of with 8 bits:
238 **
239 ** // a b c d e f g h
240 ** m = (m & 0x55) + ((m >> 1) & 0x55);
241 ** // . b . d . f . h + . a . c . e . g = a+b c+d e+f g+h
242 ** m = (m & 0x33) + ((m >> 2) & 0x33);
243 ** // . . c+d . . g+h + . . a+b . . e+f = a+b+c+d e+f+g+h
244 ** m = (m & 0x0f) + ((m >> 4) & 0x0f);
245 ** // . . . . e+f+g+h + . . . . a+b+c+d = a+b+c+d+e+f+g+h
246 **
247 ** In the actual code some unnecessary mask have been removed, improving
248 ** performance quite a bit, because masks are 32 bit immediate values for
249 ** which most RISC processors need two instructions to load them. Talking
250 ** about performance. The code is close to optimal, it should compile to
251 ** only about 22 MIPS or SPARC instructions. Dividing the block into 4
252 ** bytes and looking up the number of bits of a byte in a table may be 10%
253 ** faster, but only if the table lives in the data cache.
254 **
255 ** At this time (2017) the optimum choice of implementation for this
256 ** function as used seems to be use the gcc builtin on all systems --
257 ** but see the comments below in the documentation of
258 ** 'COUNT_TRUES_BLOCKS'.
259 **
260 */
261 UInt COUNT_TRUES_BLOCK(UInt block);
262
263 /****************************************************************************
264 **
265 *F COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )
266 **
267 ** 'COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )' returns the total number of 1
268 ** bits in the array of UInt values starting at <ptr> and including a total
269 ** of <nblocks> UInts. The only reason this function is really needed is
270 ** that, owing to hardware bugs and compiler peculiarities current in 2017,
271 ** (see https://danluu.com/assembly-intrinsics/ or
272 ** https://stackoverflow.com/questions/25078285?) manually unrolling this
273 ** loop makes the code substantially faster on almost all CPUS.
274 **
275 ** This interacts strangely with the choice of algorithm for
276 ** COUNT_TRUES_BLOCK above. Without the loop unrolling, not using the gcc
277 ** builtin is sometimes faster, apparently because it allows the compiler
278 ** to unroll the loop and then generate SSE or AVX code to process multiple
279 ** words at once. With the loop unrolling the builtin is always faster, and
280 ** will itself generate AVX code when compiling for suitable processors.
281 **
282 ** TODO: monitor this situation periodically.
283 */
284 UInt COUNT_TRUES_BLOCKS(const UInt * ptr, UInt nblocks);
285
286 /****************************************************************************
287 **
288 *F * * * * * * * * * * * * * initialize module * * * * * * * * * * * * * * *
289 */
290
291 /****************************************************************************
292 **
293 *F InitInfoBlist() . . . . . . . . . . . . . . . . . table of init functions
294 */
295 StructInitInfo * InitInfoBlist ( void );
296
297
298 #endif // GAP_BLISTER_H
299