1 use std::hash::Hash;
2 
3 mod private {
4     use std::collections::HashMap;
5     use std::hash::Hash;
6     use std::fmt;
7 
8     #[derive(Clone)]
9     #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
10     pub struct DuplicatesBy<I: Iterator, Key, F> {
11         pub(crate) iter: I,
12         pub(crate) meta: Meta<Key, F>,
13     }
14 
15     impl<I, V, F> fmt::Debug for DuplicatesBy<I, V, F>
16     where
17         I: Iterator + fmt::Debug,
18         V: fmt::Debug + Hash + Eq,
19     {
20         debug_fmt_fields!(DuplicatesBy, iter, meta.used);
21     }
22 
23     impl<I: Iterator, Key: Eq + Hash, F> DuplicatesBy<I, Key, F> {
new(iter: I, key_method: F) -> Self24         pub(crate) fn new(iter: I, key_method: F) -> Self {
25             DuplicatesBy {
26                 iter,
27                 meta: Meta {
28                     used: HashMap::new(),
29                     pending: 0,
30                     key_method,
31                 },
32             }
33         }
34     }
35 
36     #[derive(Clone)]
37     pub struct Meta<Key, F> {
38         used: HashMap<Key, bool>,
39         pending: usize,
40         key_method: F,
41     }
42 
43     impl<Key, F> Meta<Key, F>
44     where
45         Key: Eq + Hash,
46     {
47         /// Takes an item and returns it back to the caller if it's the second time we see it.
48         /// Otherwise the item is consumed and None is returned
49         #[inline(always)]
filter<I>(&mut self, item: I) -> Option<I> where F: KeyMethod<Key, I>,50         fn filter<I>(&mut self, item: I) -> Option<I>
51         where
52             F: KeyMethod<Key, I>,
53         {
54             let kv = self.key_method.make(item);
55             match self.used.get_mut(kv.key_ref()) {
56                 None => {
57                     self.used.insert(kv.key(), false);
58                     self.pending += 1;
59                     None
60                 }
61                 Some(true) => None,
62                 Some(produced) => {
63                     *produced = true;
64                     self.pending -= 1;
65                     Some(kv.value())
66                 }
67             }
68         }
69     }
70 
71     impl<I, Key, F> Iterator for DuplicatesBy<I, Key, F>
72     where
73         I: Iterator,
74         Key: Eq + Hash,
75         F: KeyMethod<Key, I::Item>,
76     {
77         type Item = I::Item;
78 
next(&mut self) -> Option<Self::Item>79         fn next(&mut self) -> Option<Self::Item> {
80             let DuplicatesBy { iter, meta } = self;
81             iter.find_map(|v| meta.filter(v))
82         }
83 
84         #[inline]
size_hint(&self) -> (usize, Option<usize>)85         fn size_hint(&self) -> (usize, Option<usize>) {
86             let (_, hi) = self.iter.size_hint();
87             let hi = hi.map(|hi| {
88                 if hi <= self.meta.pending {
89                     // fewer or equally many iter-remaining elements than pending elements
90                     // => at most, each iter-remaining element is matched
91                     hi
92                 } else {
93                     // fewer pending elements than iter-remaining elements
94                     // => at most:
95                     //    * each pending element is matched
96                     //    * the other iter-remaining elements come in pairs
97                     self.meta.pending + (hi - self.meta.pending) / 2
98                 }
99             });
100             // The lower bound is always 0 since we might only get unique items from now on
101             (0, hi)
102         }
103     }
104 
105     impl<I, Key, F> DoubleEndedIterator for DuplicatesBy<I, Key, F>
106     where
107         I: DoubleEndedIterator,
108         Key: Eq + Hash,
109         F: KeyMethod<Key, I::Item>,
110     {
next_back(&mut self) -> Option<Self::Item>111         fn next_back(&mut self) -> Option<Self::Item> {
112             let DuplicatesBy { iter, meta } = self;
113             iter.rev().find_map(|v| meta.filter(v))
114         }
115     }
116 
117     /// A keying method for use with `DuplicatesBy`
118     pub trait KeyMethod<K, V> {
119         type Container: KeyXorValue<K, V>;
120 
make(&mut self, value: V) -> Self::Container121         fn make(&mut self, value: V) -> Self::Container;
122     }
123 
124     /// Apply the identity function to elements before checking them for equality.
125     #[derive(Debug)]
126     pub struct ById;
127     impl<V> KeyMethod<V, V> for ById {
128         type Container = JustValue<V>;
129 
make(&mut self, v: V) -> Self::Container130         fn make(&mut self, v: V) -> Self::Container {
131             JustValue(v)
132         }
133     }
134 
135     /// Apply a user-supplied function to elements before checking them for equality.
136     pub struct ByFn<F>(pub(crate) F);
137     impl<F> fmt::Debug for ByFn<F> {
138         debug_fmt_fields!(ByFn,);
139     }
140     impl<K, V, F> KeyMethod<K, V> for ByFn<F>
141     where
142         F: FnMut(&V) -> K,
143     {
144         type Container = KeyValue<K, V>;
145 
make(&mut self, v: V) -> Self::Container146         fn make(&mut self, v: V) -> Self::Container {
147             KeyValue((self.0)(&v), v)
148         }
149     }
150 
151     // Implementors of this trait can hold onto a key and a value but only give access to one of them
152     // at a time. This allows the key and the value to be the same value internally
153     pub trait KeyXorValue<K, V> {
key_ref(&self) -> &K154         fn key_ref(&self) -> &K;
key(self) -> K155         fn key(self) -> K;
value(self) -> V156         fn value(self) -> V;
157     }
158 
159     #[derive(Debug)]
160     pub struct KeyValue<K, V>(K, V);
161     impl<K, V> KeyXorValue<K, V> for KeyValue<K, V> {
key_ref(&self) -> &K162         fn key_ref(&self) -> &K {
163             &self.0
164         }
key(self) -> K165         fn key(self) -> K {
166             self.0
167         }
value(self) -> V168         fn value(self) -> V {
169             self.1
170         }
171     }
172 
173     #[derive(Debug)]
174     pub struct JustValue<V>(V);
175     impl<V> KeyXorValue<V, V> for JustValue<V> {
key_ref(&self) -> &V176         fn key_ref(&self) -> &V {
177             &self.0
178         }
key(self) -> V179         fn key(self) -> V {
180             self.0
181         }
value(self) -> V182         fn value(self) -> V {
183             self.0
184         }
185     }
186 }
187 
188 /// An iterator adapter to filter for duplicate elements.
189 ///
190 /// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information.
191 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
192 pub type DuplicatesBy<I, V, F> = private::DuplicatesBy<I, V, private::ByFn<F>>;
193 
194 /// Create a new `DuplicatesBy` iterator.
duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F> where Key: Eq + Hash, F: FnMut(&I::Item) -> Key, I: Iterator,195 pub fn duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F>
196 where
197     Key: Eq + Hash,
198     F: FnMut(&I::Item) -> Key,
199     I: Iterator,
200 {
201     DuplicatesBy::new(iter, private::ByFn(f))
202 }
203 
204 /// An iterator adapter to filter out duplicate elements.
205 ///
206 /// See [`.duplicates()`](crate::Itertools::duplicates) for more information.
207 pub type Duplicates<I> = private::DuplicatesBy<I, <I as Iterator>::Item, private::ById>;
208 
209 /// Create a new `Duplicates` iterator.
duplicates<I>(iter: I) -> Duplicates<I> where I: Iterator, I::Item: Eq + Hash,210 pub fn duplicates<I>(iter: I) -> Duplicates<I>
211 where
212     I: Iterator,
213     I::Item: Eq + Hash,
214 {
215     Duplicates::new(iter, private::ById)
216 }
217 
218