1 /****************************************************************************
2 **
3 **  This file is part of GAP, a system for computational discrete algebra.
4 **
5 **  Copyright of GAP belongs to its developers, whose names are too numerous
6 **  to list here. Please refer to the COPYRIGHT file for details.
7 **
8 **  SPDX-License-Identifier: GPL-2.0-or-later
9 **
10 **  This  file declares the functions  that mainly operate  on boolean lists.
11 **  Because boolean lists are  just a special case  of lists many  things are
12 **  done in the list package.
13 **
14 **  A *boolean list* is a list that has no holes and contains only 'true' and
15 **  'false'.  For  the full definition of  boolean list  see chapter "Boolean
16 **  Lists" in the {\GAP} Manual.  Read  also the section "More  about Boolean
17 **  Lists" about the different internal representations of such lists.
18 */
19 
20 #ifndef GAP_BLISTER_H
21 #define GAP_BLISTER_H
22 
23 #include "bool.h"
24 #include "objects.h"
25 
26 /****************************************************************************
27 **
28 *F  IS_BLIST_REP( <list> )  . . . . .  check if <list> is in boolean list rep
29 */
IS_BLIST_REP(Obj list)30 EXPORT_INLINE Int IS_BLIST_REP(Obj list)
31 {
32     return T_BLIST <= TNUM_OBJ(list) &&
33            TNUM_OBJ(list) <= T_BLIST_SSORT + IMMUTABLE;
34 }
35 
36 
37 /****************************************************************************
38 **
39 *F  SIZE_PLEN_BLIST( <plen> ) . . size for a blist with given physical length
40 **
41 **  'SIZE_PLEN_BLIST' returns  the size  that a boolean list  with  room  for
42 **  <plen> elements must at least have.
43 **
44 */
SIZE_PLEN_BLIST(Int plen)45 EXPORT_INLINE Int SIZE_PLEN_BLIST(Int plen)
46 {
47     GAP_ASSERT(plen >= 0);
48     return sizeof(Obj) + (plen + BIPEB - 1) / BIPEB * sizeof(UInt);
49 }
50 
51 /****************************************************************************
52 **
53 *F  LEN_BLIST( <list> ) . . . . . . . . . . . . . .  length of a boolean list
54 **
55 **  'LEN_BLIST' returns the logical length of the boolean list <list>, as a C
56 **  integer.
57 **
58 */
LEN_BLIST(Obj list)59 EXPORT_INLINE Int LEN_BLIST(Obj list)
60 {
61     GAP_ASSERT(IS_BLIST_REP(list));
62     return INT_INTOBJ(CONST_ADDR_OBJ(list)[0]);
63 }
64 
65 
66 /***************************************************************************
67 **
68 *F  NUMBER_BLOCKS_BLIST(<list>) . . . . . . . . number of UInt blocks in list
69 **
70 */
NUMBER_BLOCKS_BLIST(Obj blist)71 EXPORT_INLINE Int NUMBER_BLOCKS_BLIST(Obj blist)
72 {
73     GAP_ASSERT(IS_BLIST_REP(blist));
74     return (LEN_BLIST(blist) + BIPEB - 1) / BIPEB;
75 }
76 
77 
78 /****************************************************************************
79 **
80 *F  SET_LEN_BLIST( <list>, <len> )  . . . .  set the length of a boolean list
81 **
82 **  'SET_LEN_BLIST' sets the  length of the boolean list  <list> to the value
83 **  <len>, which must be a positive C integer.
84 **
85 */
SET_LEN_BLIST(Obj list,Int len)86 EXPORT_INLINE void SET_LEN_BLIST(Obj list, Int len)
87 {
88     GAP_ASSERT(IS_BLIST_REP(list));
89     GAP_ASSERT(len >= 0);
90     ADDR_OBJ(list)[0] = INTOBJ_INT(len);
91 }
92 
93 
94 /****************************************************************************
95 **
96 *F  BLOCKS_BLIST( <list> )  . . . . . . . . . . first block of a boolean list
97 **
98 **  returns a pointer to the start of the data of the Boolean list
99 **
100 */
BLOCKS_BLIST(Obj list)101 EXPORT_INLINE UInt * BLOCKS_BLIST(Obj list)
102 {
103     return ((UInt *)(ADDR_OBJ(list) + 1));
104 }
105 
CONST_BLOCKS_BLIST(Obj list)106 EXPORT_INLINE const UInt * CONST_BLOCKS_BLIST(Obj list)
107 {
108     return ((const UInt *)(CONST_ADDR_OBJ(list) + 1));
109 }
110 
111 /****************************************************************************
112 **
113 *F  BLOCK_ELM_BLIST_PTR( <list>, <pos> ) . . ptr to a block of a boolean list
114 **
115 **  'BLOCK_ELM_BLIST_PTR' return a pointer to the block containing the
116 **  <pos>-th element of the boolean list <list>. <pos> must be a positive
117 **  integer less than or equal to the length of <list>.
118 */
BLOCK_ELM_BLIST_PTR(Obj list,UInt pos)119 EXPORT_INLINE UInt * BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
120 {
121     return BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
122 }
123 
CONST_BLOCK_ELM_BLIST_PTR(Obj list,UInt pos)124 EXPORT_INLINE const UInt * CONST_BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
125 {
126     return CONST_BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
127 }
128 
129 /****************************************************************************
130 **
131 *F  MASK_POS_BLIST( <pos> )  . . . .  bit mask for position of a Boolean list
132 **
133 **  'MASK_POS_BLIST(<pos>)' returns a UInt with a single set bit in  position
134 **  '(<pos>-1) % BIPEB',
135 **  useful for accessing the <pos>-th element of a blist.
136 */
MASK_POS_BLIST(UInt pos)137 EXPORT_INLINE UInt MASK_POS_BLIST(UInt pos)
138 {
139     return ((UInt)1) << (pos - 1) % BIPEB;
140 }
141 
142 /****************************************************************************
143 **
144 *F  TEST_BIT_BLIST( <list>, <pos> ) . . . . . .  test a bit of a boolean list
145 **
146 **  'TEST_BIT_BLIST' return a non-zero value if the <pos>-th element of the
147 **  boolean list <list> is 1, and otherwise 0. <pos> must be a positive
148 **  integer less than or equal to the length of <list>.
149 */
TEST_BIT_BLIST(Obj list,UInt pos)150 EXPORT_INLINE Int TEST_BIT_BLIST(Obj list, UInt pos)
151 {
152     return *CONST_BLOCK_ELM_BLIST_PTR(list, pos) & MASK_POS_BLIST(pos);
153 }
154 
155 /****************************************************************************
156 **
157 *F  ELM_BLIST( <list>, <pos> ) . . . . . . . . . .  element of a boolean list
158 **
159 **  'ELM_BLIST' return the <pos>-th bit of the boolean list <list>, which is
160 **  either 'true' or 'false'.  <pos> must  be a positive integer less than or
161 **  equal to the length of <list>.
162 */
ELM_BLIST(Obj list,UInt pos)163 EXPORT_INLINE Obj ELM_BLIST(Obj list, UInt pos)
164 {
165     return TEST_BIT_BLIST(list, pos) ? True : False;
166 }
167 
168 /****************************************************************************
169 **
170 *F  SET_BIT_BLIST( <list>, <pos> ) . . . . . . .  set a bit of a boolean list
171 *F  CLEAR_BIT_BLIST( <list>, <pos> ) . . . . . clears a bit of a boolean list
172 **
173 **  These function set the bit at position <pos> in the boolean list <list>
174 **  to 1 resp. 0.  <pos> must be a positive integer less than or equal to
175 **  the length of <list>.
176 */
SET_BIT_BLIST(Obj list,UInt pos)177 EXPORT_INLINE void SET_BIT_BLIST(Obj list, UInt pos)
178 {
179     *BLOCK_ELM_BLIST_PTR(list, pos) |= MASK_POS_BLIST(pos);
180 }
181 
CLEAR_BIT_BLIST(Obj list,UInt pos)182 EXPORT_INLINE void CLEAR_BIT_BLIST(Obj list, UInt pos)
183 {
184     *BLOCK_ELM_BLIST_PTR(list, pos) &= ~MASK_POS_BLIST(pos);
185 }
186 
187 
188 /****************************************************************************
189 **
190 *F * * * * * * * * * * * * * * list functions * * * * * * * * * * * * * * * *
191 */
192 
193 /****************************************************************************
194 **
195 *F  AssBlist( <list>, <pos>, <val> )  . . . . . . .  assign to a boolean list
196 **
197 **  'AssBlist' assigns the   value <val> to  the  boolean list <list> at  the
198 **  position <pos>.   It is the responsibility  of the caller to  ensure that
199 **  <pos> is positive, and that <val> is not 0.
200 **
201 **  'AssBlist' is the function in 'AssListFuncs' for boolean lists.
202 **
203 **  If <pos> is less than or equal to the logical length  of the boolean list
204 **  and <val> is 'true' or   'false' the assignment  is  done by setting  the
205 **  corresponding bit.  If <pos>  is one more  than the logical length of the
206 **  boolean list  the assignment is  done by   resizing  the boolean list  if
207 **  necessary, setting the   corresponding bit and  incrementing  the logical
208 **  length  by one.  Otherwise  the boolean list is  converted to an ordinary
209 **  list and the assignment is performed the ordinary way.
210 */
211 void AssBlist(Obj list, Int pos, Obj val);
212 
213 
214 /****************************************************************************
215 **
216 *F  ConvBlist( <list> ) . . . . . . . . .  convert a list into a boolean list
217 **
218 **  'ConvBlist' changes the representation of boolean  lists into the compact
219 **  representation of type 'T_BLIST' described above.
220 */
221 void ConvBlist(Obj list);
222 
223 
224 /****************************************************************************
225 **
226 *F  COUNT_TRUES_BLOCK( <block> ) . . . . . . . . . . .  count number of trues
227 **
228 **  'COUNT_TRUES_BLOCK( <block> )' returns the number of 1 bits in the
229 **  UInt <block>. Two implementations are included below. One uses the
230 **  gcc builtin __builtin_popcount which usually generates the popcntl
231 **  or popcntq instruction on sufficiently recent CPUs. The other uses
232 **  the algorithm described in the original comment below:
233 **
234 **  The sequence to compute the  number of bits in  a block is quite  clever.
235 **  The idea is that after the <i>-th instruction each subblock of $2^i$ bits
236 **  holds the number of   bits of this  subblock  in the original block  <m>.
237 **  This is illustrated in the example below for a block of with 8 bits:
238 **
239 **       // a b c d e f g h
240 **      m = (m & 0x55)       +  ((m >> 1) & 0x55);
241 **       // . b . d . f . h  +  . a . c . e . g   =  a+b c+d e+f g+h
242 **      m = (m & 0x33)       +  ((m >> 2) & 0x33);
243 **       // . . c+d . . g+h  +  . . a+b . . e+f   =  a+b+c+d e+f+g+h
244 **      m = (m & 0x0f)       +  ((m >> 4) & 0x0f);
245 **       // . . . . e+f+g+h  +  . . . . a+b+c+d   =  a+b+c+d+e+f+g+h
246 **
247 **  In the actual  code  some unnecessary mask  have  been removed, improving
248 **  performance quite a bit,  because masks are 32  bit immediate values  for
249 **  which most RISC  processors need two  instructions to load them.  Talking
250 **  about performance.  The code is  close to optimal,  it should compile  to
251 **  only about  22 MIPS  or SPARC instructions.   Dividing the  block into  4
252 **  bytes and looking up the number of bits  of a byte in a  table may be 10%
253 **  faster, but only if the table lives in the data cache.
254 **
255 **  At this time (2017) the optimum choice of implementation for this
256 **  function as used seems to be use the gcc builtin on all systems --
257 **  but see the comments below in the documentation of
258 **  'COUNT_TRUES_BLOCKS'.
259 **
260 */
261 UInt COUNT_TRUES_BLOCK(UInt block);
262 
263 /****************************************************************************
264 **
265 *F  COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )
266 **
267 **  'COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )' returns the total number of 1
268 **  bits in the array of UInt values starting at <ptr> and including a total
269 **  of <nblocks> UInts. The only reason this function is really needed is
270 **  that, owing to hardware bugs and compiler peculiarities current in 2017,
271 **  (see https://danluu.com/assembly-intrinsics/ or
272 **  https://stackoverflow.com/questions/25078285?) manually unrolling this
273 **  loop makes the code substantially faster on almost all CPUS.
274 **
275 **  This interacts strangely with the choice of algorithm for
276 **  COUNT_TRUES_BLOCK above. Without the loop unrolling, not using the gcc
277 **  builtin is sometimes faster, apparently because it allows the compiler
278 **  to unroll the loop and then generate SSE or AVX code to process multiple
279 **  words at once. With the loop unrolling the builtin is always faster, and
280 **  will itself generate AVX code when compiling for suitable processors.
281 **
282 **  TODO: monitor this situation periodically.
283 */
284 UInt COUNT_TRUES_BLOCKS(const UInt * ptr, UInt nblocks);
285 
286 /****************************************************************************
287 **
288 *F * * * * * * * * * * * * * initialize module * * * * * * * * * * * * * * *
289 */
290 
291 /****************************************************************************
292 **
293 *F  InitInfoBlist() . . . . . . . . . . . . . . . . . table of init functions
294 */
295 StructInitInfo * InitInfoBlist ( void );
296 
297 
298 #endif // GAP_BLISTER_H
299