1 //! An index-accessed table implementation that avoids duplicate entries.
2 use std::collections::HashMap;
3 use std::hash::Hash;
4 use std::slice;
5 
6 /// Collect items into the `table` list, removing duplicates.
7 pub(crate) struct UniqueTable<'entries, T: Eq + Hash> {
8     table: Vec<&'entries T>,
9     map: HashMap<&'entries T, usize>,
10 }
11 
12 impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> {
new() -> Self13     pub fn new() -> Self {
14         Self {
15             table: Vec::new(),
16             map: HashMap::new(),
17         }
18     }
19 
add(&mut self, entry: &'entries T) -> usize20     pub fn add(&mut self, entry: &'entries T) -> usize {
21         match self.map.get(&entry) {
22             None => {
23                 let i = self.table.len();
24                 self.table.push(entry);
25                 self.map.insert(entry, i);
26                 i
27             }
28             Some(&i) => i,
29         }
30     }
31 
len(&self) -> usize32     pub fn len(&self) -> usize {
33         self.table.len()
34     }
get(&self, index: usize) -> &T35     pub fn get(&self, index: usize) -> &T {
36         self.table[index]
37     }
iter(&self) -> slice::Iter<&'entries T>38     pub fn iter(&self) -> slice::Iter<&'entries T> {
39         self.table.iter()
40     }
41 }
42 
43 /// A table of sequences which tries to avoid common subsequences.
44 pub(crate) struct UniqueSeqTable<T: PartialEq + Clone> {
45     table: Vec<T>,
46 }
47 
48 impl<T: PartialEq + Clone> UniqueSeqTable<T> {
new() -> Self49     pub fn new() -> Self {
50         Self { table: Vec::new() }
51     }
add(&mut self, values: &[T]) -> usize52     pub fn add(&mut self, values: &[T]) -> usize {
53         if values.is_empty() {
54             return 0;
55         }
56         if let Some(offset) = find_subsequence(values, &self.table) {
57             offset
58         } else {
59             let table_len = self.table.len();
60 
61             // Try to put in common the last elements of the table if they're a prefix of the new
62             // sequence.
63             //
64             // We know there wasn't a full match, so the best prefix we can hope to find contains
65             // all the values but the last one.
66             let mut start_from = usize::min(table_len, values.len() - 1);
67             while start_from != 0 {
68                 // Loop invariant: start_from <= table_len, so table_len - start_from >= 0.
69                 if values[0..start_from] == self.table[table_len - start_from..table_len] {
70                     break;
71                 }
72                 start_from -= 1;
73             }
74 
75             self.table
76                 .extend(values[start_from..values.len()].iter().cloned());
77             table_len - start_from
78         }
79     }
len(&self) -> usize80     pub fn len(&self) -> usize {
81         self.table.len()
82     }
iter(&self) -> slice::Iter<T>83     pub fn iter(&self) -> slice::Iter<T> {
84         self.table.iter()
85     }
86 }
87 
88 /// Try to find the subsequence `sub` in the `whole` sequence. Returns None if
89 /// it's not been found, or Some(index) if it has been. Naive implementation
90 /// until proven we need something better.
find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize>91 fn find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize> {
92     assert!(!sub.is_empty());
93     // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len().
94     if whole.len() < sub.len() {
95         return None;
96     }
97     let max = whole.len() - sub.len();
98     for i in 0..=max {
99         if whole[i..i + sub.len()] == sub[..] {
100             return Some(i);
101         }
102     }
103     None
104 }
105 
106 #[test]
test_find_subsequence()107 fn test_find_subsequence() {
108     assert_eq!(find_subsequence(&vec![1], &vec![4]), None);
109     assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0));
110     assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None);
111     assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0));
112     assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None);
113     assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1));
114     assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None);
115     assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3));
116     assert_eq!(
117         find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]),
118         Some(1)
119     );
120 }
121 
122 #[test]
test_optimal_add()123 fn test_optimal_add() {
124     let mut seq_table = UniqueSeqTable::new();
125     // [0, 1, 2, 3]
126     assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
127     assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
128     assert_eq!(seq_table.add(&vec![1, 2, 3]), 1);
129     assert_eq!(seq_table.add(&vec![2, 3]), 2);
130     assert_eq!(seq_table.len(), 4);
131     // [0, 1, 2, 3, 4]
132     assert_eq!(seq_table.add(&vec![2, 3, 4]), 2);
133     assert_eq!(seq_table.len(), 5);
134     // [0, 1, 2, 3, 4, 6, 5, 7]
135     assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4);
136     assert_eq!(seq_table.len(), 8);
137     // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4]
138     assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8);
139     assert_eq!(seq_table.add(&vec![8]), 8);
140     assert_eq!(seq_table.len(), 12);
141 }
142