1 #![doc(html_root_url="https://docs.rs/phf_generator/0.7")]
2 use phf_shared::{PhfHash, HashKey};
3 use rand::{SeedableRng, Rng};
4 use rand::distributions::Standard;
5 use rand::rngs::SmallRng;
6 
7 const DEFAULT_LAMBDA: usize = 5;
8 
9 const FIXED_SEED: u64 = 1234567890;
10 
11 pub struct HashState {
12     pub key: HashKey,
13     pub disps: Vec<(u32, u32)>,
14     pub map: Vec<usize>,
15 }
16 
generate_hash<H: PhfHash>(entries: &[H]) -> HashState17 pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
18     SmallRng::seed_from_u64(FIXED_SEED)
19         .sample_iter(Standard)
20         .find_map(|key| try_generate_hash(entries, key))
21         .expect("failed to solve PHF")
22 }
23 
try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState>24 fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> {
25     struct Bucket {
26         idx: usize,
27         keys: Vec<usize>,
28     }
29 
30     let hashes: Vec<_> = entries.iter().map(|entry| phf_shared::hash(entry, &key)).collect();
31 
32     let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
33     let mut buckets = (0..buckets_len)
34                           .map(|i| {
35                               Bucket {
36                                   idx: i,
37                                   keys: vec![],
38                               }
39                           })
40                           .collect::<Vec<_>>();
41 
42     for (i, hash) in hashes.iter().enumerate() {
43         buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i);
44     }
45 
46     // Sort descending
47     buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
48 
49     let table_len = hashes.len();
50     let mut map = vec![None; table_len];
51     let mut disps = vec![(0u32, 0u32); buckets_len];
52 
53     // store whether an element from the bucket being placed is
54     // located at a certain position, to allow for efficient overlap
55     // checks. It works by storing the generation in each cell and
56     // each new placement-attempt is a new generation, so you can tell
57     // if this is legitimately full by checking that the generations
58     // are equal. (A u64 is far too large to overflow in a reasonable
59     // time for current hardware.)
60     let mut try_map = vec![0u64; table_len];
61     let mut generation = 0u64;
62 
63     // the actual values corresponding to the markers above, as
64     // (index, key) pairs, for adding to the main map once we've
65     // chosen the right disps.
66     let mut values_to_add = vec![];
67 
68     'buckets: for bucket in &buckets {
69         for d1 in 0..(table_len as u32) {
70             'disps: for d2 in 0..(table_len as u32) {
71                 values_to_add.clear();
72                 generation += 1;
73 
74                 for &key in &bucket.keys {
75                     let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) %
76                                (table_len as u32)) as usize;
77                     if map[idx].is_some() || try_map[idx] == generation {
78                         continue 'disps;
79                     }
80                     try_map[idx] = generation;
81                     values_to_add.push((idx, key));
82                 }
83 
84                 // We've picked a good set of disps
85                 disps[bucket.idx] = (d1, d2);
86                 for &(idx, key) in &values_to_add {
87                     map[idx] = Some(key);
88                 }
89                 continue 'buckets;
90             }
91         }
92 
93         // Unable to find displacements for a bucket
94         return None;
95     }
96 
97     Some(HashState {
98         key,
99         disps,
100         map: map.into_iter().map(|i| i.unwrap()).collect(),
101     })
102 }
103