1 /* Generate random permutations.
2 
3    Copyright (C) 2006-2018 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 /* Written by Paul Eggert.  */
19 
20 #include <config.h>
21 
22 #include "hash.h"
23 #include "randperm.h"
24 
25 #include <limits.h>
26 #include <stdlib.h>
27 
28 #include "xalloc.h"
29 
30 /* Return the ceiling of the log base 2 of N.  If N is zero, return
31    an unspecified value.  */
32 
33 static size_t _GL_ATTRIBUTE_CONST
ceil_lg(size_t n)34 ceil_lg (size_t n)
35 {
36   size_t b = 0;
37   for (n--; n != 0; n /= 2)
38     b++;
39   return b;
40 }
41 
42 /* Return an upper bound on the number of random bytes needed to
43    generate the first H elements of a random permutation of N
44    elements.  H must not exceed N.  */
45 
46 size_t
randperm_bound(size_t h,size_t n)47 randperm_bound (size_t h, size_t n)
48 {
49   /* Upper bound on number of bits needed to generate the first number
50      of the permutation.  */
51   size_t lg_n = ceil_lg (n);
52 
53   /* Upper bound on number of bits needed to generated the first H elements.  */
54   size_t ar = lg_n * h;
55 
56   /* Convert the bit count to a byte count.  */
57   size_t bound = (ar + CHAR_BIT - 1) / CHAR_BIT;
58 
59   return bound;
60 }
61 
62 /* Swap elements I and J in array V.  */
63 
64 static void
swap(size_t * v,size_t i,size_t j)65 swap (size_t *v, size_t i, size_t j)
66 {
67   size_t t = v[i];
68   v[i] = v[j];
69   v[j] = t;
70 }
71 
72 /* Structures and functions for a sparse_map abstract data type that's
73    used to effectively swap elements I and J in array V like swap(),
74    but in a more memory efficient manner (when the number of permutations
75    performed is significantly less than the size of the input).  */
76 
77 struct sparse_ent_
78 {
79    size_t index;
80    size_t val;
81 };
82 
83 static size_t
sparse_hash_(void const * x,size_t table_size)84 sparse_hash_ (void const *x, size_t table_size)
85 {
86   struct sparse_ent_ const *ent = x;
87   return ent->index % table_size;
88 }
89 
90 static bool
sparse_cmp_(void const * x,void const * y)91 sparse_cmp_ (void const *x, void const *y)
92 {
93   struct sparse_ent_ const *ent1 = x;
94   struct sparse_ent_ const *ent2 = y;
95   return ent1->index == ent2->index;
96 }
97 
98 typedef Hash_table sparse_map;
99 
100 /* Initialize the structure for the sparse map,
101    when a best guess as to the number of entries
102    specified with SIZE_HINT.  */
103 
104 static sparse_map *
sparse_new(size_t size_hint)105 sparse_new (size_t size_hint)
106 {
107   return hash_initialize (size_hint, NULL, sparse_hash_, sparse_cmp_, free);
108 }
109 
110 /* Swap the values for I and J.  If a value is not already present
111    then assume it's equal to the index.  Update the value for
112    index I in array V.  */
113 
114 static void
sparse_swap(sparse_map * sv,size_t * v,size_t i,size_t j)115 sparse_swap (sparse_map *sv, size_t* v, size_t i, size_t j)
116 {
117   struct sparse_ent_ *v1 = hash_delete (sv, &(struct sparse_ent_) {i,0});
118   struct sparse_ent_ *v2 = hash_delete (sv, &(struct sparse_ent_) {j,0});
119 
120   /* FIXME: reduce the frequency of these mallocs.  */
121   if (!v1)
122     {
123       v1 = xmalloc (sizeof *v1);
124       v1->index = v1->val = i;
125     }
126   if (!v2)
127     {
128       v2 = xmalloc (sizeof *v2);
129       v2->index = v2->val = j;
130     }
131 
132   size_t t = v1->val;
133   v1->val = v2->val;
134   v2->val = t;
135   if (!hash_insert (sv, v1))
136     xalloc_die ();
137   if (!hash_insert (sv, v2))
138     xalloc_die ();
139 
140   v[i] = v1->val;
141 }
142 
143 static void
sparse_free(sparse_map * sv)144 sparse_free (sparse_map *sv)
145 {
146   hash_free (sv);
147 }
148 
149 
150 /* From R, allocate and return a malloc'd array of the first H elements
151    of a random permutation of N elements.  H must not exceed N.
152    Return NULL if H is zero.  */
153 
154 size_t *
randperm_new(struct randint_source * r,size_t h,size_t n)155 randperm_new (struct randint_source *r, size_t h, size_t n)
156 {
157   size_t *v;
158 
159   switch (h)
160     {
161     case 0:
162       v = NULL;
163       break;
164 
165     case 1:
166       v = xmalloc (sizeof *v);
167       v[0] = randint_choose (r, n);
168       break;
169 
170     default:
171       {
172         /* The algorithm is essentially the same in both
173            the sparse and non sparse case.  In the sparse case we use
174            a hash to implement sparse storage for the set of n numbers
175            we're shuffling.  When to use the sparse method was
176            determined with the help of this script:
177 
178            #!/bin/sh
179            for n in $(seq 2 32); do
180              for h in $(seq 2 32); do
181                test $h -gt $n && continue
182                for s in o n; do
183                  test $s = o && shuf=shuf || shuf=./shuf
184                  num=$(env time -f "$s:${h},${n} = %e,%M" \
185                        $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
186                  test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
187                done
188              done
189            done
190 
191            This showed that if sparseness = n/h, then:
192 
193            sparseness = 128 => .125 mem used, and about same speed
194            sparseness =  64 => .25  mem used, but 1.5 times slower
195            sparseness =  32 => .5   mem used, but 2 times slower
196 
197            Also the memory usage was only significant when n > 128Ki
198         */
199         bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
200 
201         size_t i;
202         sparse_map *sv;
203 
204         if (sparse)
205           {
206             sv = sparse_new (h * 2);
207             if (sv == NULL)
208               xalloc_die ();
209             v = xnmalloc (h, sizeof *v);
210           }
211         else
212           {
213             sv = NULL; /* To placate GCC's -Wuninitialized.  */
214             v = xnmalloc (n, sizeof *v);
215             for (i = 0; i < n; i++)
216               v[i] = i;
217           }
218 
219         for (i = 0; i < h; i++)
220           {
221             size_t j = i + randint_choose (r, n - i);
222             if (sparse)
223               sparse_swap (sv, v, i, j);
224             else
225               swap (v, i, j);
226           }
227 
228         if (sparse)
229           sparse_free (sv);
230         else
231           v = xnrealloc (v, h, sizeof *v);
232       }
233       break;
234     }
235 
236   return v;
237 }
238