1 // Copyright 2018 Developers of the Rand project.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 //! Low-level API for sampling indices
10
11 #[cfg(feature = "alloc")] use core::slice;
12
13 #[cfg(feature = "alloc")] use alloc::vec::{self, Vec};
14 // BTreeMap is not as fast in tests, but better than nothing.
15 #[cfg(all(feature = "alloc", not(feature = "std")))]
16 use alloc::collections::BTreeSet;
17 #[cfg(feature = "std")] use std::collections::HashSet;
18
19 #[cfg(feature = "alloc")]
20 use crate::distributions::{uniform::SampleUniform, Distribution, Uniform, WeightedError};
21 use crate::Rng;
22
23 #[cfg(feature = "serde1")]
24 use serde::{Serialize, Deserialize};
25
26 /// A vector of indices.
27 ///
28 /// Multiple internal representations are possible.
29 #[derive(Clone, Debug)]
30 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
31 pub enum IndexVec {
32 #[doc(hidden)]
33 U32(Vec<u32>),
34 #[doc(hidden)]
35 USize(Vec<usize>),
36 }
37
38 impl IndexVec {
39 /// Returns the number of indices
40 #[inline]
len(&self) -> usize41 pub fn len(&self) -> usize {
42 match *self {
43 IndexVec::U32(ref v) => v.len(),
44 IndexVec::USize(ref v) => v.len(),
45 }
46 }
47
48 /// Returns `true` if the length is 0.
49 #[inline]
is_empty(&self) -> bool50 pub fn is_empty(&self) -> bool {
51 match *self {
52 IndexVec::U32(ref v) => v.is_empty(),
53 IndexVec::USize(ref v) => v.is_empty(),
54 }
55 }
56
57 /// Return the value at the given `index`.
58 ///
59 /// (Note: we cannot implement [`std::ops::Index`] because of lifetime
60 /// restrictions.)
61 #[inline]
index(&self, index: usize) -> usize62 pub fn index(&self, index: usize) -> usize {
63 match *self {
64 IndexVec::U32(ref v) => v[index] as usize,
65 IndexVec::USize(ref v) => v[index],
66 }
67 }
68
69 /// Return result as a `Vec<usize>`. Conversion may or may not be trivial.
70 #[inline]
into_vec(self) -> Vec<usize>71 pub fn into_vec(self) -> Vec<usize> {
72 match self {
73 IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(),
74 IndexVec::USize(v) => v,
75 }
76 }
77
78 /// Iterate over the indices as a sequence of `usize` values
79 #[inline]
iter(&self) -> IndexVecIter<'_>80 pub fn iter(&self) -> IndexVecIter<'_> {
81 match *self {
82 IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()),
83 IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()),
84 }
85 }
86 }
87
88 impl IntoIterator for IndexVec {
89 type Item = usize;
90 type IntoIter = IndexVecIntoIter;
91
92 /// Convert into an iterator over the indices as a sequence of `usize` values
93 #[inline]
into_iter(self) -> IndexVecIntoIter94 fn into_iter(self) -> IndexVecIntoIter {
95 match self {
96 IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()),
97 IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()),
98 }
99 }
100 }
101
102 impl PartialEq for IndexVec {
eq(&self, other: &IndexVec) -> bool103 fn eq(&self, other: &IndexVec) -> bool {
104 use self::IndexVec::*;
105 match (self, other) {
106 (&U32(ref v1), &U32(ref v2)) => v1 == v2,
107 (&USize(ref v1), &USize(ref v2)) => v1 == v2,
108 (&U32(ref v1), &USize(ref v2)) => {
109 (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y))
110 }
111 (&USize(ref v1), &U32(ref v2)) => {
112 (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize))
113 }
114 }
115 }
116 }
117
118 impl From<Vec<u32>> for IndexVec {
119 #[inline]
from(v: Vec<u32>) -> Self120 fn from(v: Vec<u32>) -> Self {
121 IndexVec::U32(v)
122 }
123 }
124
125 impl From<Vec<usize>> for IndexVec {
126 #[inline]
from(v: Vec<usize>) -> Self127 fn from(v: Vec<usize>) -> Self {
128 IndexVec::USize(v)
129 }
130 }
131
132 /// Return type of `IndexVec::iter`.
133 #[derive(Debug)]
134 pub enum IndexVecIter<'a> {
135 #[doc(hidden)]
136 U32(slice::Iter<'a, u32>),
137 #[doc(hidden)]
138 USize(slice::Iter<'a, usize>),
139 }
140
141 impl<'a> Iterator for IndexVecIter<'a> {
142 type Item = usize;
143
144 #[inline]
next(&mut self) -> Option<usize>145 fn next(&mut self) -> Option<usize> {
146 use self::IndexVecIter::*;
147 match *self {
148 U32(ref mut iter) => iter.next().map(|i| *i as usize),
149 USize(ref mut iter) => iter.next().cloned(),
150 }
151 }
152
153 #[inline]
size_hint(&self) -> (usize, Option<usize>)154 fn size_hint(&self) -> (usize, Option<usize>) {
155 match *self {
156 IndexVecIter::U32(ref v) => v.size_hint(),
157 IndexVecIter::USize(ref v) => v.size_hint(),
158 }
159 }
160 }
161
162 impl<'a> ExactSizeIterator for IndexVecIter<'a> {}
163
164 /// Return type of `IndexVec::into_iter`.
165 #[derive(Clone, Debug)]
166 pub enum IndexVecIntoIter {
167 #[doc(hidden)]
168 U32(vec::IntoIter<u32>),
169 #[doc(hidden)]
170 USize(vec::IntoIter<usize>),
171 }
172
173 impl Iterator for IndexVecIntoIter {
174 type Item = usize;
175
176 #[inline]
next(&mut self) -> Option<Self::Item>177 fn next(&mut self) -> Option<Self::Item> {
178 use self::IndexVecIntoIter::*;
179 match *self {
180 U32(ref mut v) => v.next().map(|i| i as usize),
181 USize(ref mut v) => v.next(),
182 }
183 }
184
185 #[inline]
size_hint(&self) -> (usize, Option<usize>)186 fn size_hint(&self) -> (usize, Option<usize>) {
187 use self::IndexVecIntoIter::*;
188 match *self {
189 U32(ref v) => v.size_hint(),
190 USize(ref v) => v.size_hint(),
191 }
192 }
193 }
194
195 impl ExactSizeIterator for IndexVecIntoIter {}
196
197
198 /// Randomly sample exactly `amount` distinct indices from `0..length`, and
199 /// return them in random order (fully shuffled).
200 ///
201 /// This method is used internally by the slice sampling methods, but it can
202 /// sometimes be useful to have the indices themselves so this is provided as
203 /// an alternative.
204 ///
205 /// The implementation used is not specified; we automatically select the
206 /// fastest available algorithm for the `length` and `amount` parameters
207 /// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
208 /// complexity is `O(amount)`, except that when `amount` is small, performance
209 /// is closer to `O(amount^2)`, and when `length` is close to `amount` then
210 /// `O(length)`.
211 ///
212 /// Note that performance is significantly better over `u32` indices than over
213 /// `u64` indices. Because of this we hide the underlying type behind an
214 /// abstraction, `IndexVec`.
215 ///
216 /// If an allocation-free `no_std` function is required, it is suggested
217 /// to adapt the internal `sample_floyd` implementation.
218 ///
219 /// Panics if `amount > length`.
sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec where R: Rng + ?Sized220 pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
221 where R: Rng + ?Sized {
222 if amount > length {
223 panic!("`amount` of samples must be less than or equal to `length`");
224 }
225 if length > (::core::u32::MAX as usize) {
226 // We never want to use inplace here, but could use floyd's alg
227 // Lazy version: always use the cache alg.
228 return sample_rejection(rng, length, amount);
229 }
230 let amount = amount as u32;
231 let length = length as u32;
232
233 // Choice of algorithm here depends on both length and amount. See:
234 // https://github.com/rust-random/rand/pull/479
235 // We do some calculations with f32. Accuracy is not very important.
236
237 if amount < 163 {
238 const C: [[f32; 2]; 2] = [[1.6, 8.0 / 45.0], [10.0, 70.0 / 9.0]];
239 let j = if length < 500_000 { 0 } else { 1 };
240 let amount_fp = amount as f32;
241 let m4 = C[0][j] * amount_fp;
242 // Short-cut: when amount < 12, floyd's is always faster
243 if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
244 sample_inplace(rng, length, amount)
245 } else {
246 sample_floyd(rng, length, amount)
247 }
248 } else {
249 const C: [f32; 2] = [270.0, 330.0 / 9.0];
250 let j = if length < 500_000 { 0 } else { 1 };
251 if (length as f32) < C[j] * (amount as f32) {
252 sample_inplace(rng, length, amount)
253 } else {
254 sample_rejection(rng, length, amount)
255 }
256 }
257 }
258
259 /// Randomly sample exactly `amount` distinct indices from `0..length`, and
260 /// return them in an arbitrary order (there is no guarantee of shuffling or
261 /// ordering). The weights are to be provided by the input function `weights`,
262 /// which will be called once for each index.
263 ///
264 /// This method is used internally by the slice sampling methods, but it can
265 /// sometimes be useful to have the indices themselves so this is provided as
266 /// an alternative.
267 ///
268 /// This implementation uses `O(length + amount)` space and `O(length)` time
269 /// if the "nightly" feature is enabled, or `O(length)` space and
270 /// `O(length + amount * log length)` time otherwise.
271 ///
272 /// Panics if `amount > length`.
sample_weighted<R, F, X>( rng: &mut R, length: usize, weight: F, amount: usize, ) -> Result<IndexVec, WeightedError> where R: Rng + ?Sized, F: Fn(usize) -> X, X: Into<f64>,273 pub fn sample_weighted<R, F, X>(
274 rng: &mut R, length: usize, weight: F, amount: usize,
275 ) -> Result<IndexVec, WeightedError>
276 where
277 R: Rng + ?Sized,
278 F: Fn(usize) -> X,
279 X: Into<f64>,
280 {
281 if length > (core::u32::MAX as usize) {
282 sample_efraimidis_spirakis(rng, length, weight, amount)
283 } else {
284 assert!(amount <= core::u32::MAX as usize);
285 let amount = amount as u32;
286 let length = length as u32;
287 sample_efraimidis_spirakis(rng, length, weight, amount)
288 }
289 }
290
291
292 /// Randomly sample exactly `amount` distinct indices from `0..length`, and
293 /// return them in an arbitrary order (there is no guarantee of shuffling or
294 /// ordering). The weights are to be provided by the input function `weights`,
295 /// which will be called once for each index.
296 ///
297 /// This implementation uses the algorithm described by Efraimidis and Spirakis
298 /// in this paper: https://doi.org/10.1016/j.ipl.2005.11.003
299 /// It uses `O(length + amount)` space and `O(length)` time if the
300 /// "nightly" feature is enabled, or `O(length)` space and `O(length
301 /// + amount * log length)` time otherwise.
302 ///
303 /// Panics if `amount > length`.
sample_efraimidis_spirakis<R, F, X, N>( rng: &mut R, length: N, weight: F, amount: N, ) -> Result<IndexVec, WeightedError> where R: Rng + ?Sized, F: Fn(usize) -> X, X: Into<f64>, N: UInt, IndexVec: From<Vec<N>>,304 fn sample_efraimidis_spirakis<R, F, X, N>(
305 rng: &mut R, length: N, weight: F, amount: N,
306 ) -> Result<IndexVec, WeightedError>
307 where
308 R: Rng + ?Sized,
309 F: Fn(usize) -> X,
310 X: Into<f64>,
311 N: UInt,
312 IndexVec: From<Vec<N>>,
313 {
314 if amount == N::zero() {
315 return Ok(IndexVec::U32(Vec::new()));
316 }
317
318 if amount > length {
319 panic!("`amount` of samples must be less than or equal to `length`");
320 }
321
322 struct Element<N> {
323 index: N,
324 key: f64,
325 }
326 impl<N> PartialOrd for Element<N> {
327 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
328 self.key.partial_cmp(&other.key)
329 }
330 }
331 impl<N> Ord for Element<N> {
332 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
333 // partial_cmp will always produce a value,
334 // because we check that the weights are not nan
335 self.partial_cmp(other).unwrap()
336 }
337 }
338 impl<N> PartialEq for Element<N> {
339 fn eq(&self, other: &Self) -> bool {
340 self.key == other.key
341 }
342 }
343 impl<N> Eq for Element<N> {}
344
345 #[cfg(feature = "nightly")]
346 {
347 let mut candidates = Vec::with_capacity(length.as_usize());
348 let mut index = N::zero();
349 while index < length {
350 let weight = weight(index.as_usize()).into();
351 if !(weight >= 0.) {
352 return Err(WeightedError::InvalidWeight);
353 }
354
355 let key = rng.gen::<f64>().powf(1.0 / weight);
356 candidates.push(Element { index, key });
357
358 index += N::one();
359 }
360
361 // Partially sort the array to find the `amount` elements with the greatest
362 // keys. Do this by using `select_nth_unstable` to put the elements with
363 // the *smallest* keys at the beginning of the list in `O(n)` time, which
364 // provides equivalent information about the elements with the *greatest* keys.
365 let (_, mid, greater)
366 = candidates.select_nth_unstable(length.as_usize() - amount.as_usize());
367
368 let mut result: Vec<N> = Vec::with_capacity(amount.as_usize());
369 result.push(mid.index);
370 for element in greater {
371 result.push(element.index);
372 }
373 Ok(IndexVec::from(result))
374 }
375
376 #[cfg(not(feature = "nightly"))]
377 {
378 #[cfg(all(feature = "alloc", not(feature = "std")))]
379 use crate::alloc::collections::BinaryHeap;
380 #[cfg(feature = "std")]
381 use std::collections::BinaryHeap;
382
383 // Partially sort the array such that the `amount` elements with the largest
384 // keys are first using a binary max heap.
385 let mut candidates = BinaryHeap::with_capacity(length.as_usize());
386 let mut index = N::zero();
387 while index < length {
388 let weight = weight(index.as_usize()).into();
389 if !(weight >= 0.) {
390 return Err(WeightedError::InvalidWeight);
391 }
392
393 let key = rng.gen::<f64>().powf(1.0 / weight);
394 candidates.push(Element { index, key });
395
396 index += N::one();
397 }
398
399 let mut result: Vec<N> = Vec::with_capacity(amount.as_usize());
400 while result.len() < amount.as_usize() {
401 result.push(candidates.pop().unwrap().index);
402 }
403 Ok(IndexVec::from(result))
404 }
405 }
406
407 /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
408 /// combination algorithm.
409 ///
410 /// The output values are fully shuffled. (Overhead is under 50%.)
411 ///
412 /// This implementation uses `O(amount)` memory and `O(amount^2)` time.
sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized413 fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
414 where R: Rng + ?Sized {
415 // For small amount we use Floyd's fully-shuffled variant. For larger
416 // amounts this is slow due to Vec::insert performance, so we shuffle
417 // afterwards. Benchmarks show little overhead from extra logic.
418 let floyd_shuffle = amount < 50;
419
420 debug_assert!(amount <= length);
421 let mut indices = Vec::with_capacity(amount as usize);
422 for j in length - amount..length {
423 let t = rng.gen_range(0..=j);
424 if floyd_shuffle {
425 if let Some(pos) = indices.iter().position(|&x| x == t) {
426 indices.insert(pos, j);
427 continue;
428 }
429 } else if indices.contains(&t) {
430 indices.push(j);
431 continue;
432 }
433 indices.push(t);
434 }
435 if !floyd_shuffle {
436 // Reimplement SliceRandom::shuffle with smaller indices
437 for i in (1..amount).rev() {
438 // invariant: elements with index > i have been locked in place.
439 indices.swap(i as usize, rng.gen_range(0..=i) as usize);
440 }
441 }
442 IndexVec::from(indices)
443 }
444
445 /// Randomly sample exactly `amount` indices from `0..length`, using an inplace
446 /// partial Fisher-Yates method.
447 /// Sample an amount of indices using an inplace partial fisher yates method.
448 ///
449 /// This allocates the entire `length` of indices and randomizes only the first `amount`.
450 /// It then truncates to `amount` and returns.
451 ///
452 /// This method is not appropriate for large `length` and potentially uses a lot
453 /// of memory; because of this we only implement for `u32` index (which improves
454 /// performance in all cases).
455 ///
456 /// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized457 fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
458 where R: Rng + ?Sized {
459 debug_assert!(amount <= length);
460 let mut indices: Vec<u32> = Vec::with_capacity(length as usize);
461 indices.extend(0..length);
462 for i in 0..amount {
463 let j: u32 = rng.gen_range(i..length);
464 indices.swap(i as usize, j as usize);
465 }
466 indices.truncate(amount as usize);
467 debug_assert_eq!(indices.len(), amount as usize);
468 IndexVec::from(indices)
469 }
470
471 trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform
472 + core::hash::Hash + core::ops::AddAssign {
zero() -> Self473 fn zero() -> Self;
one() -> Self474 fn one() -> Self;
as_usize(self) -> usize475 fn as_usize(self) -> usize;
476 }
477 impl UInt for u32 {
478 #[inline]
zero() -> Self479 fn zero() -> Self {
480 0
481 }
482
483 #[inline]
one() -> Self484 fn one() -> Self {
485 1
486 }
487
488 #[inline]
as_usize(self) -> usize489 fn as_usize(self) -> usize {
490 self as usize
491 }
492 }
493 impl UInt for usize {
494 #[inline]
zero() -> Self495 fn zero() -> Self {
496 0
497 }
498
499 #[inline]
one() -> Self500 fn one() -> Self {
501 1
502 }
503
504 #[inline]
as_usize(self) -> usize505 fn as_usize(self) -> usize {
506 self
507 }
508 }
509
510 /// Randomly sample exactly `amount` indices from `0..length`, using rejection
511 /// sampling.
512 ///
513 /// Since `amount <<< length` there is a low chance of a random sample in
514 /// `0..length` being a duplicate. We test for duplicates and resample where
515 /// necessary. The algorithm is `O(amount)` time and memory.
516 ///
517 /// This function is generic over X primarily so that results are value-stable
518 /// over 32-bit and 64-bit platforms.
sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec where R: Rng + ?Sized, IndexVec: From<Vec<X>>,519 fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
520 where
521 R: Rng + ?Sized,
522 IndexVec: From<Vec<X>>,
523 {
524 debug_assert!(amount < length);
525 #[cfg(feature = "std")]
526 let mut cache = HashSet::with_capacity(amount.as_usize());
527 #[cfg(not(feature = "std"))]
528 let mut cache = BTreeSet::new();
529 let distr = Uniform::new(X::zero(), length);
530 let mut indices = Vec::with_capacity(amount.as_usize());
531 for _ in 0..amount.as_usize() {
532 let mut pos = distr.sample(rng);
533 while !cache.insert(pos) {
534 pos = distr.sample(rng);
535 }
536 indices.push(pos);
537 }
538
539 debug_assert_eq!(indices.len(), amount.as_usize());
540 IndexVec::from(indices)
541 }
542
543 #[cfg(test)]
544 mod test {
545 use super::*;
546
547 #[test]
548 #[cfg(feature = "serde1")]
test_serialization_index_vec()549 fn test_serialization_index_vec() {
550 let some_index_vec = IndexVec::from(vec![254_usize, 234, 2, 1]);
551 let de_some_index_vec: IndexVec = bincode::deserialize(&bincode::serialize(&some_index_vec).unwrap()).unwrap();
552 match (some_index_vec, de_some_index_vec) {
553 (IndexVec::U32(a), IndexVec::U32(b)) => {
554 assert_eq!(a, b);
555 },
556 (IndexVec::USize(a), IndexVec::USize(b)) => {
557 assert_eq!(a, b);
558 },
559 _ => {panic!("failed to seralize/deserialize `IndexVec`")}
560 }
561 }
562
563 #[cfg(feature = "alloc")] use alloc::vec;
564
565 #[test]
test_sample_boundaries()566 fn test_sample_boundaries() {
567 let mut r = crate::test::rng(404);
568
569 assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0);
570 assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0);
571 assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]);
572
573 assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0);
574
575 assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
576 assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
577 assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);
578
579 // These algorithms should be fast with big numbers. Test average.
580 let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32).into_iter().sum();
581 assert!(1 << 25 < sum && sum < (1 << 25) * 25);
582
583 let sum: usize = sample_floyd(&mut r, 1 << 25, 10).into_iter().sum();
584 assert!(1 << 25 < sum && sum < (1 << 25) * 25);
585 }
586
587 #[test]
588 #[cfg_attr(miri, ignore)] // Miri is too slow
test_sample_alg()589 fn test_sample_alg() {
590 let seed_rng = crate::test::rng;
591
592 // We can't test which algorithm is used directly, but Floyd's alg
593 // should produce different results from the others. (Also, `inplace`
594 // and `cached` currently use different sizes thus produce different results.)
595
596 // A small length and relatively large amount should use inplace
597 let (length, amount): (usize, usize) = (100, 50);
598 let v1 = sample(&mut seed_rng(420), length, amount);
599 let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32);
600 assert!(v1.iter().all(|e| e < length));
601 assert_eq!(v1, v2);
602
603 // Test Floyd's alg does produce different results
604 let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32);
605 assert!(v1 != v3);
606
607 // A large length and small amount should use Floyd
608 let (length, amount): (usize, usize) = (1 << 20, 50);
609 let v1 = sample(&mut seed_rng(421), length, amount);
610 let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32);
611 assert!(v1.iter().all(|e| e < length));
612 assert_eq!(v1, v2);
613
614 // A large length and larger amount should use cache
615 let (length, amount): (usize, usize) = (1 << 20, 600);
616 let v1 = sample(&mut seed_rng(422), length, amount);
617 let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32);
618 assert!(v1.iter().all(|e| e < length));
619 assert_eq!(v1, v2);
620 }
621
622 #[test]
test_sample_weighted()623 fn test_sample_weighted() {
624 let seed_rng = crate::test::rng;
625 for &(amount, len) in &[(0, 10), (5, 10), (10, 10)] {
626 let v = sample_weighted(&mut seed_rng(423), len, |i| i as f64, amount).unwrap();
627 match v {
628 IndexVec::U32(mut indices) => {
629 assert_eq!(indices.len(), amount);
630 indices.sort();
631 indices.dedup();
632 assert_eq!(indices.len(), amount);
633 for &i in &indices {
634 assert!((i as usize) < len);
635 }
636 },
637 IndexVec::USize(_) => panic!("expected `IndexVec::U32`"),
638 }
639 }
640 }
641
642 #[test]
value_stability_sample()643 fn value_stability_sample() {
644 let do_test = |length, amount, values: &[u32]| {
645 let mut buf = [0u32; 8];
646 let mut rng = crate::test::rng(410);
647
648 let res = sample(&mut rng, length, amount);
649 let len = res.len().min(buf.len());
650 for (x, y) in res.into_iter().zip(buf.iter_mut()) {
651 *y = x as u32;
652 }
653 assert_eq!(
654 &buf[0..len],
655 values,
656 "failed sampling {}, {}",
657 length,
658 amount
659 );
660 };
661
662 do_test(10, 6, &[8, 0, 3, 5, 9, 6]); // floyd
663 do_test(25, 10, &[18, 15, 14, 9, 0, 13, 5, 24]); // floyd
664 do_test(300, 8, &[30, 283, 150, 1, 73, 13, 285, 35]); // floyd
665 do_test(300, 80, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace
666 do_test(300, 180, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace
667
668 do_test(1000_000, 8, &[
669 103717, 963485, 826422, 509101, 736394, 807035, 5327, 632573,
670 ]); // floyd
671 do_test(1000_000, 180, &[
672 103718, 963490, 826426, 509103, 736396, 807036, 5327, 632573,
673 ]); // rejection
674 }
675 }
676