1 //! A variant of `SortedMap` that preserves insertion order.
2 
3 use std::borrow::Borrow;
4 use std::hash::{Hash, Hasher};
5 use std::iter::FromIterator;
6 
7 use crate::stable_hasher::{HashStable, StableHasher};
8 use rustc_index::vec::{Idx, IndexVec};
9 
10 /// An indexed multi-map that preserves insertion order while permitting both *O*(log *n*) lookup of
11 /// an item by key and *O*(1) lookup by index.
12 ///
13 /// This data structure is a hybrid of an [`IndexVec`] and a [`SortedMap`]. Like `IndexVec`,
14 /// `SortedIndexMultiMap` assigns a typed index to each item while preserving insertion order.
15 /// Like `SortedMap`, `SortedIndexMultiMap` has efficient lookup of items by key. However, this
16 /// is accomplished by sorting an array of item indices instead of the items themselves.
17 ///
18 /// Unlike `SortedMap`, this data structure can hold multiple equivalent items at once, so the
19 /// `get_by_key` method and its variants return an iterator instead of an `Option`. Equivalent
20 /// items will be yielded in insertion order.
21 ///
22 /// Unlike a general-purpose map like `BTreeSet` or `HashSet`, `SortedMap` and
23 /// `SortedIndexMultiMap` require *O*(*n*) time to insert a single item. This is because we may need
24 /// to insert into the middle of the sorted array. Users should avoid mutating this data structure
25 /// in-place.
26 ///
27 /// [`SortedMap`]: super::SortedMap
28 #[derive(Clone, Debug)]
29 pub struct SortedIndexMultiMap<I: Idx, K, V> {
30     /// The elements of the map in insertion order.
31     items: IndexVec<I, (K, V)>,
32 
33     /// Indices of the items in the set, sorted by the item's key.
34     idx_sorted_by_item_key: Vec<I>,
35 }
36 
37 impl<I: Idx, K: Ord, V> SortedIndexMultiMap<I, K, V> {
new() -> Self38     pub fn new() -> Self {
39         SortedIndexMultiMap { items: IndexVec::new(), idx_sorted_by_item_key: Vec::new() }
40     }
41 
len(&self) -> usize42     pub fn len(&self) -> usize {
43         self.items.len()
44     }
45 
is_empty(&self) -> bool46     pub fn is_empty(&self) -> bool {
47         self.items.is_empty()
48     }
49 
50     /// Returns an iterator over the items in the map in insertion order.
into_iter(self) -> impl DoubleEndedIterator<Item = (K, V)>51     pub fn into_iter(self) -> impl DoubleEndedIterator<Item = (K, V)> {
52         self.items.into_iter()
53     }
54 
55     /// Returns an iterator over the items in the map in insertion order along with their indices.
into_iter_enumerated(self) -> impl DoubleEndedIterator<Item = (I, (K, V))>56     pub fn into_iter_enumerated(self) -> impl DoubleEndedIterator<Item = (I, (K, V))> {
57         self.items.into_iter_enumerated()
58     }
59 
60     /// Returns an iterator over the items in the map in insertion order.
iter(&self) -> impl '_ + DoubleEndedIterator<Item = (&K, &V)>61     pub fn iter(&self) -> impl '_ + DoubleEndedIterator<Item = (&K, &V)> {
62         self.items.iter().map(|(ref k, ref v)| (k, v))
63     }
64 
65     /// Returns an iterator over the items in the map in insertion order along with their indices.
iter_enumerated(&self) -> impl '_ + DoubleEndedIterator<Item = (I, (&K, &V))>66     pub fn iter_enumerated(&self) -> impl '_ + DoubleEndedIterator<Item = (I, (&K, &V))> {
67         self.items.iter_enumerated().map(|(i, (ref k, ref v))| (i, (k, v)))
68     }
69 
70     /// Returns the item in the map with the given index.
get(&self, idx: I) -> Option<&(K, V)>71     pub fn get(&self, idx: I) -> Option<&(K, V)> {
72         self.items.get(idx)
73     }
74 
75     /// Returns an iterator over the items in the map that are equal to `key`.
76     ///
77     /// If there are multiple items that are equivalent to `key`, they will be yielded in
78     /// insertion order.
get_by_key<Q: 'a>(&'a self, key: &Q) -> impl 'a + Iterator<Item = &'a V> where Q: Ord + ?Sized, K: Borrow<Q>,79     pub fn get_by_key<Q: 'a>(&'a self, key: &Q) -> impl 'a + Iterator<Item = &'a V>
80     where
81         Q: Ord + ?Sized,
82         K: Borrow<Q>,
83     {
84         self.get_by_key_enumerated(key).map(|(_, v)| v)
85     }
86 
87     /// Returns an iterator over the items in the map that are equal to `key` along with their
88     /// indices.
89     ///
90     /// If there are multiple items that are equivalent to `key`, they will be yielded in
91     /// insertion order.
get_by_key_enumerated<Q>(&self, key: &Q) -> impl '_ + Iterator<Item = (I, &V)> where Q: Ord + ?Sized, K: Borrow<Q>,92     pub fn get_by_key_enumerated<Q>(&self, key: &Q) -> impl '_ + Iterator<Item = (I, &V)>
93     where
94         Q: Ord + ?Sized,
95         K: Borrow<Q>,
96     {
97         // FIXME: This should be in the standard library as `equal_range`. See rust-lang/rfcs#2184.
98         match self.binary_search_idx(key) {
99             Err(_) => self.idxs_to_items_enumerated(&[]),
100 
101             Ok(idx) => {
102                 let start = self.find_lower_bound(key, idx);
103                 let end = self.find_upper_bound(key, idx);
104                 self.idxs_to_items_enumerated(&self.idx_sorted_by_item_key[start..end])
105             }
106         }
107     }
108 
binary_search_idx<Q>(&self, key: &Q) -> Result<usize, usize> where Q: Ord + ?Sized, K: Borrow<Q>,109     fn binary_search_idx<Q>(&self, key: &Q) -> Result<usize, usize>
110     where
111         Q: Ord + ?Sized,
112         K: Borrow<Q>,
113     {
114         self.idx_sorted_by_item_key.binary_search_by(|&idx| self.items[idx].0.borrow().cmp(key))
115     }
116 
117     /// Returns the index into the `idx_sorted_by_item_key` array of the first item equal to
118     /// `key`.
119     ///
120     /// `initial` must be an index into that same array for an item that is equal to `key`.
find_lower_bound<Q>(&self, key: &Q, initial: usize) -> usize where Q: Ord + ?Sized, K: Borrow<Q>,121     fn find_lower_bound<Q>(&self, key: &Q, initial: usize) -> usize
122     where
123         Q: Ord + ?Sized,
124         K: Borrow<Q>,
125     {
126         debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key);
127 
128         // FIXME: At present, this uses linear search, meaning lookup is only `O(log n)` if duplicate
129         // entries are rare. It would be better to start with a linear search for the common case but
130         // fall back to an exponential search if many duplicates are found. This applies to
131         // `upper_bound` as well.
132         let mut start = initial;
133         while start != 0 && self.items[self.idx_sorted_by_item_key[start - 1]].0.borrow() == key {
134             start -= 1;
135         }
136 
137         start
138     }
139 
140     /// Returns the index into the `idx_sorted_by_item_key` array of the first item greater than
141     /// `key`, or `self.len()` if no such item exists.
142     ///
143     /// `initial` must be an index into that same array for an item that is equal to `key`.
find_upper_bound<Q>(&self, key: &Q, initial: usize) -> usize where Q: Ord + ?Sized, K: Borrow<Q>,144     fn find_upper_bound<Q>(&self, key: &Q, initial: usize) -> usize
145     where
146         Q: Ord + ?Sized,
147         K: Borrow<Q>,
148     {
149         debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key);
150 
151         // See the FIXME for `find_lower_bound`.
152         let mut end = initial + 1;
153         let len = self.items.len();
154         while end < len && self.items[self.idx_sorted_by_item_key[end]].0.borrow() == key {
155             end += 1;
156         }
157 
158         end
159     }
160 
idxs_to_items_enumerated(&'a self, idxs: &'a [I]) -> impl 'a + Iterator<Item = (I, &'a V)>161     fn idxs_to_items_enumerated(&'a self, idxs: &'a [I]) -> impl 'a + Iterator<Item = (I, &'a V)> {
162         idxs.iter().map(move |&idx| (idx, &self.items[idx].1))
163     }
164 }
165 
166 impl<I: Idx, K: Eq, V: Eq> Eq for SortedIndexMultiMap<I, K, V> {}
167 impl<I: Idx, K: PartialEq, V: PartialEq> PartialEq for SortedIndexMultiMap<I, K, V> {
eq(&self, other: &Self) -> bool168     fn eq(&self, other: &Self) -> bool {
169         // No need to compare the sorted index. If the items are the same, the index will be too.
170         self.items == other.items
171     }
172 }
173 
174 impl<I: Idx, K, V> Hash for SortedIndexMultiMap<I, K, V>
175 where
176     K: Hash,
177     V: Hash,
178 {
hash<H: Hasher>(&self, hasher: &mut H)179     fn hash<H: Hasher>(&self, hasher: &mut H) {
180         self.items.hash(hasher)
181     }
182 }
183 impl<I: Idx, K, V, C> HashStable<C> for SortedIndexMultiMap<I, K, V>
184 where
185     K: HashStable<C>,
186     V: HashStable<C>,
187 {
hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher)188     fn hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher) {
189         self.items.hash_stable(ctx, hasher)
190     }
191 }
192 
193 impl<I: Idx, K: Ord, V> FromIterator<(K, V)> for SortedIndexMultiMap<I, K, V> {
from_iter<J>(iter: J) -> Self where J: IntoIterator<Item = (K, V)>,194     fn from_iter<J>(iter: J) -> Self
195     where
196         J: IntoIterator<Item = (K, V)>,
197     {
198         let items = IndexVec::from_iter(iter);
199         let mut idx_sorted_by_item_key: Vec<_> = items.indices().collect();
200 
201         // `sort_by_key` is stable, so insertion order is preserved for duplicate items.
202         idx_sorted_by_item_key.sort_by_key(|&idx| &items[idx].0);
203 
204         SortedIndexMultiMap { items, idx_sorted_by_item_key }
205     }
206 }
207 
208 impl<I: Idx, K, V> std::ops::Index<I> for SortedIndexMultiMap<I, K, V> {
209     type Output = V;
210 
index(&self, idx: I) -> &Self::Output211     fn index(&self, idx: I) -> &Self::Output {
212         &self.items[idx].1
213     }
214 }
215 
216 #[cfg(tests)]
217 mod tests;
218