1 /* Jitter: word set data structure.
2 
3    Copyright (C) 2020 Luca Saiu
4    Written by Luca Saiu
5 
6    This file is part of Jitter.
7 
8    Jitter is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    Jitter is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with Jitter.  If not, see <http://www.gnu.org/licenses/>. */
20 
21 
22 #include <stdio.h>  /* Only for the debugging functions.  I may want to
23                        remove those altogether. */
24 
25 #include <jitter/jitter-word-set.h>
26 
27 #include <jitter/jitter-fatal.h>
28 #include <jitter/jitter-malloc.h>
29 
30 
31 
32 
33 /* Internal functions.
34  * ************************************************************************** */
35 
36 /* Fill the pointed buffer with element_no copies of the unused entry. */
37 static void
jitter_word_set_empty_buffer(jitter_uint * buffer,size_t element_no)38 jitter_word_set_empty_buffer (jitter_uint *buffer, size_t element_no)
39 {
40   /* GCC translates this into a memset call when the number of elements is
41      not statically known.  I guess it is the right choice. */
42   int i;
43   for (i = 0; i < element_no; i ++)
44     buffer [i] = JITTER_WORD_SET_UNUSED;
45 }
46 
47 static jitter_uint *
jitter_word_set_allocate_and_empty_buffer(size_t element_no)48 jitter_word_set_allocate_and_empty_buffer (size_t element_no)
49 {
50   jitter_uint *res = jitter_xmalloc (element_no * sizeof (jitter_uint));
51   jitter_word_set_empty_buffer (res, element_no);
52   return res;
53 }
54 
55 void
jitter_word_set_double(struct jitter_word_set * ps)56 jitter_word_set_double (struct jitter_word_set *ps)
57 {
58   size_t old_element_no = ps->allocated_element_no;
59   size_t new_element_no = old_element_no * 2;
60   jitter_uint *old_buffer = ps->buffer;
61   jitter_uint *new_buffer
62     = jitter_word_set_allocate_and_empty_buffer (new_element_no);
63   ps->mask = (ps->mask << 1) | 1;
64   ps->buffer = new_buffer;
65   ps->allocated_element_no = new_element_no;
66   ps->used_element_limit
67     = JITTER_WORD_SET_ELEMENT_NO_TO_LIMIT (new_element_no);
68   ps->used_element_no = 0;
69   int i;
70   for (i = 0; i < old_element_no; i ++)
71     {
72       jitter_uint p = old_buffer [i];
73       if (JITTER_WORD_SET_IS_VALID (p))
74         {
75           /* Add the element to the new buffer, without using the more
76              convenient macros which rely on a word set structure and
77              automatically resize the buffer. */
78           jitter_uint *pp;
79           size_t probeno __attribute__ ((unused));
80           JITTER_WORD_SET_BUFFER_SEARCH (new_buffer, ps->mask, p,
81                                             false,
82                                             probeno, pp);
83           * pp = p;
84           ps->used_element_no ++;
85         }
86     }
87   free (old_buffer);
88   //printf ("New size: %li (new element limit %li)\n", new_element_no, ps->used_element_limit);
89 }
90 
91 
92 
93 
94 /* Initialisation and finalisation.
95  * ************************************************************************** */
96 
97 /* Perform sanity checks.  This compiles to nothing in correct configuratins, so
98    it is simpler to just call the function from jitter_word_set_initialize
99    instead of providing another annoying global initialisation function to the
100    user. */
101 static void
jitter_word_set_sanity_checks(void)102 jitter_word_set_sanity_checks (void)
103 {
104   if (JITTER_WORD_SET_INITIAL_ELEMENT_NO < 1)
105     jitter_fatal ("JITTER_WORD_SET_INITIAL_ELEMENT_NO is less than 1");
106   if (JITTER_WORD_SET_RECIPROCAL_FILL_RATIO <= 1)
107     jitter_fatal ("JITTER_WORD_SET_RECIPROCAL_FILL_RATIO is less than or "
108                   "equal to 1");
109 }
110 
111 void
jitter_word_set_initialize(struct jitter_word_set * ps)112 jitter_word_set_initialize (struct jitter_word_set *ps)
113 {
114   /* Crash on configuration errors. */
115   jitter_word_set_sanity_checks ();
116 
117   size_t element_no = JITTER_WORD_SET_INITIAL_ELEMENT_NO;
118   jitter_uint index_mask_plus_one = 2;
119   while (index_mask_plus_one < element_no)
120     index_mask_plus_one *= 2;
121   if (index_mask_plus_one != element_no)
122     jitter_fatal ("jitter word set: element no not an even power of two");
123   ps->allocated_element_no = element_no;
124   ps->used_element_limit
125     = JITTER_WORD_SET_ELEMENT_NO_TO_LIMIT (element_no);
126   ps->mask = index_mask_plus_one - 1;
127   /* I want to mask two different sets of bits:
128      - enough bit to cover the number of elements: index_mask_plus_one - 1
129      - the low bits, which are always clear for offsets to aligned elements
130        and are implicitly shifted in or out when converting between offsets
131        and pointers.  Here I want to reason in offsets. */
132   ps->mask
133     = ((ps->mask << JITTER_LG_BYTES_PER_WORD)
134        | JITTER_POINTER_MISALIGNMENT_BITS_MASK);
135 
136   ps->used_element_no = 0;
137   ps->buffer = jitter_word_set_allocate_and_empty_buffer (element_no);
138 }
139 
140 void
jitter_word_set_finalize(struct jitter_word_set * ps)141 jitter_word_set_finalize (struct jitter_word_set *ps)
142 {
143   free (ps->buffer);
144 }
145 
146 
147 
148 
149 /* Emptying and rebuilding.
150  * ************************************************************************** */
151 
152 void
jitter_word_set_clear(struct jitter_word_set * ps)153 jitter_word_set_clear (struct jitter_word_set *ps)
154 {
155   jitter_word_set_empty_buffer (ps->buffer, ps->allocated_element_no);
156   ps->used_element_no = 0;
157 }
158 
159 void
jitter_word_set_clear_and_minimize(struct jitter_word_set * ps)160 jitter_word_set_clear_and_minimize (struct jitter_word_set *ps)
161 {
162   jitter_word_set_finalize (ps);
163   jitter_word_set_initialize (ps);
164 }
165 
166 void
jitter_word_set_clear_and_possibly_minimize(struct jitter_word_set * ps,bool minimize)167 jitter_word_set_clear_and_possibly_minimize (struct jitter_word_set *ps,
168                                                 bool minimize)
169 {
170   /* Differently from jitter_word_set_rebuild_and_possibly_minimize this
171      function does not come out so naturally out of factoring. */
172   if (minimize)
173     jitter_word_set_clear_and_minimize (ps);
174   else
175     jitter_word_set_clear (ps);
176 
177 }
178 
179 /* This factors the common logic of jitter_word_set_rebuild and
180    jitter_word_set_rebuild_and_minimize.  It is also useful on its own. */
181 void
jitter_word_set_rebuild_and_possibly_minimize(struct jitter_word_set * ps,bool minimize)182 jitter_word_set_rebuild_and_possibly_minimize (struct jitter_word_set *ps,
183                                                   bool minimize)
184 {
185   /* Keep a pointer to the old buffer and the number of elements, that we would
186      otherwise lose by modifying *ps . */
187   jitter_uint *old_buffer = ps->buffer;
188   size_t old_allocated_element_no = ps->allocated_element_no;
189 
190   if (minimize)
191     {
192       /* If we are minimising, just re-initialise the structure as if it were
193          fresh.  A new buffer will be allocated, and old_buffer will also remain
194          valid.  This resets the number of used elements, which is what we
195          want. */
196       jitter_word_set_initialize (ps);
197     }
198   else
199     {
200       /* If we are not minimising, replace the buffer in the struct with a new
201          one, and empty the new buffer as well as zeroing the number of
202          entries. */
203       ps->buffer
204         = jitter_xmalloc (ps->allocated_element_no * sizeof (jitter_uint));
205       jitter_word_set_clear (ps);
206     }
207 
208   /* At this point the structure is consistent but empty.  Copy non-unused
209      non-deleted entries from the old buffer into it. */
210   int i;
211   for (i = 0; i < old_allocated_element_no; i ++)
212     if (JITTER_WORD_SET_IS_VALID (old_buffer [i]))
213       JITTER_WORD_SET_ADD_NEW (ps, old_buffer [i]);
214 
215   /* We are done with the old buffer. */
216   free (old_buffer);
217 }
218 
219 void
jitter_word_set_rebuild(struct jitter_word_set * ps)220 jitter_word_set_rebuild (struct jitter_word_set *ps)
221 {
222   jitter_word_set_rebuild_and_possibly_minimize (ps, false);
223 }
224 
225 void
jitter_word_set_rebuild_and_minimize(struct jitter_word_set * ps)226 jitter_word_set_rebuild_and_minimize (struct jitter_word_set *ps)
227 {
228   jitter_word_set_rebuild_and_possibly_minimize (ps, true);
229 }
230 
231 
232 
233 
234 /* Debugging.
235  * ************************************************************************** */
236 
237 /* Some common logic factoring jitter_word_set_print and
238    jitter_word_set_print_statistics . */
239 static void
jitter_word_set_print_possibly_with_statistics(struct jitter_word_set * psp,bool statistics)240 jitter_word_set_print_possibly_with_statistics
241    (struct jitter_word_set *psp, bool statistics)
242 {
243   jitter_uint i;
244   long valid_element_no = 0;
245   long deleted_element_no = 0;
246   long total_element_no = psp->allocated_element_no;
247   double total_probe_no = 0;
248   long min_probe_no __attribute__ ((unused)) = total_element_no;
249   long max_probe_no = 0;
250   for (i = 0; i < total_element_no; i ++)
251     {
252       jitter_uint p = psp->buffer [i];
253       if (! statistics)
254         printf ("%4li. ", (long) i);
255       if (JITTER_WORD_SET_IS_VALID (p))
256         {
257           long probe_no;
258           JITTER_WORD_SET_SET_PROBE_NO (psp, p, probe_no);
259           if (! statistics)
260             printf ("%-18p   probe no %li\n", (void *) p, probe_no);
261           valid_element_no ++;
262           total_probe_no += probe_no;
263           if (probe_no < min_probe_no) min_probe_no = probe_no;
264           if (probe_no > max_probe_no) max_probe_no = probe_no;
265         }
266       else if (p == JITTER_WORD_SET_UNUSED)
267         {
268           if (! statistics)
269             printf ("unused\n");
270         }
271       else if (p == JITTER_WORD_SET_DELETED)
272         {
273           if (! statistics)
274             printf ("deleted\n");
275           deleted_element_no ++;
276         }
277       else
278         jitter_fatal ("impossible");
279     }
280   if (statistics)
281     {
282       if (valid_element_no > 0)
283         {
284           double fill_ratio = ((valid_element_no + deleted_element_no)
285                                / (double) total_element_no);
286           double average_probe_no = total_probe_no / valid_element_no;
287           printf ("elt(val/del/tot) %6li/%li/%-6li "
288                   "fill %4.2f "
289                   "probes(avg/max)%7.3f/%7li\n",
290                   valid_element_no, deleted_element_no, total_element_no,
291                   fill_ratio,
292                   average_probe_no, max_probe_no);
293         }
294       else
295         printf ("empty word set: no statistics\n");
296     }
297 }
298 
299 void
jitter_word_set_print(struct jitter_word_set * psp)300 jitter_word_set_print (struct jitter_word_set *psp)
301 {
302   jitter_word_set_print_possibly_with_statistics (psp, false);
303 }
304 
305 void
jitter_word_set_print_statistics(struct jitter_word_set * psp)306 jitter_word_set_print_statistics (struct jitter_word_set *psp)
307 {
308   jitter_word_set_print_possibly_with_statistics (psp, true);
309 }
310 
311 
312 
313 
314 /* Scratch: code to disassemble, benchmarks and simulations.
315  * ************************************************************************** */
316 
317 __attribute__ ((noclone, noinline))
318 bool
jitter_word_set_test0(jitter_int n)319 jitter_word_set_test0 (jitter_int n)
320 {
321   return ! ! n;
322 }
323 
324 __attribute__ ((noclone, noinline))
325 int /*bool*/
jitter_word_set_test1(struct jitter_word_set * psp,jitter_uint p)326 jitter_word_set_test1 (struct jitter_word_set *psp, jitter_uint p)
327 {
328   int /*bool*/ b;
329   JITTER_WORD_SET_SET_HAS (psp, p, b);
330   return b;
331 }
332 
333 __attribute__ ((noclone, noinline))
334 bool
jitter_word_set_test1b(struct jitter_word_set * psp,jitter_uint p)335 jitter_word_set_test1b (struct jitter_word_set *psp, jitter_uint p)
336 {
337   bool b;
338   JITTER_WORD_SET_SET_HAS (psp, p, b);
339   return b;
340 }
341 
342 __attribute__ ((noclone, noinline))
343 void
jitter_word_set_test2(struct jitter_word_set * psp,jitter_uint p)344 jitter_word_set_test2 (struct jitter_word_set *psp, jitter_uint p)
345 {
346   JITTER_WORD_SET_ADD_NEW (psp, p);
347 }
348 
349 __attribute__ ((noclone, noinline))
350 void
jitter_word_set_test3(struct jitter_word_set * psp,jitter_uint p)351 jitter_word_set_test3 (struct jitter_word_set *psp, jitter_uint p)
352 {
353   JITTER_WORD_SET_ADD_UNIQUE (psp, p);
354 }
355 
356 __attribute__ ((noclone, noinline))
357 void
jitter_word_set_test4(struct jitter_word_set * psp,jitter_uint p)358 jitter_word_set_test4 (struct jitter_word_set *psp, jitter_uint p)
359 {
360   JITTER_WORD_SET_REMOVE (psp, p);
361 }
362 
363 __attribute__ ((noclone, noinline))
364 void
jitter_word_set_test5(struct jitter_word_set * psp,jitter_uint p)365 jitter_word_set_test5 (struct jitter_word_set *psp, jitter_uint p)
366 {
367   int /*bool*/ b;
368   JITTER_WORD_SET_SET_HAS (psp, p, b);
369   if (b)
370     {
371       JITTER_WORD_SET_REMOVE (psp, p);
372     }
373   else
374     {
375       JITTER_WORD_SET_REMOVE (psp, (jitter_uint) psp);
376     }
377 }
378 
379 void
jitter_word_set_test_hash(long random_element_no)380 jitter_word_set_test_hash (long random_element_no)
381 {
382   struct jitter_word_set ps;
383   jitter_word_set_initialize (& ps);
384   long i;
385   for (i = 0; i < random_element_no; /* nothing */)
386     {
387       long remaining_element_no = random_element_no - i;
388 
389       //long sequential_element_no = rand () % remaining_element_no + 1;
390 
391       long sequential_element_no = (long) remaining_element_no * (2. / 3);
392       if (sequential_element_no == 0)
393         sequential_element_no = 1;
394 
395       jitter_uint rn0 = rand ();
396 #if JITTER_BYTES_PER_WORD == 8
397       jitter_uint rn1 = rand ();
398       jitter_uint rn = rn0 << 32 | rn1;
399 #else
400       jitter_uint rn = rn0;
401 #endif
402       rn &= ~ (((jitter_uint) 1 << JITTER_LG_BYTES_PER_WORD) - 1);
403       long j;
404       for (j = 0; j < sequential_element_no; j ++)
405         JITTER_WORD_SET_ADD_UNIQUE (& ps, rn + JITTER_BYTES_PER_WORD * j);
406 
407       i += sequential_element_no;
408     }
409   // jitter_word_set_print (& ps);
410   printf ("%-10li ", random_element_no);
411   jitter_word_set_print_statistics (& ps);
412   jitter_word_set_finalize (& ps);
413 }
414 
415 void
jitter_word_set_word_set_test(void)416 jitter_word_set_word_set_test (void)
417 {
418   unsigned long table_size;
419   for (table_size = 64; table_size < ((unsigned long) 1 << 30); table_size *= 2)
420     {
421       long element_no
422         = (long) ((double) table_size
423                   / JITTER_WORD_SET_RECIPROCAL_FILL_RATIO
424                   - 1);
425       jitter_word_set_test_hash (element_no);
426     }
427 }
428