1 use std::cell::{Cell, RefCell};
2 use alloc::vec::{self, Vec};
3 
4 /// A trait to unify FnMut for GroupBy with the chunk key in IntoChunks
5 trait KeyFunction<A> {
6     type Key;
call_mut(&mut self, arg: A) -> Self::Key7     fn call_mut(&mut self, arg: A) -> Self::Key;
8 }
9 
10 impl<'a, A, K, F: ?Sized> KeyFunction<A> for F
11     where F: FnMut(A) -> K
12 {
13     type Key = K;
14     #[inline]
call_mut(&mut self, arg: A) -> Self::Key15     fn call_mut(&mut self, arg: A) -> Self::Key {
16         (*self)(arg)
17     }
18 }
19 
20 
21 /// ChunkIndex acts like the grouping key function for IntoChunks
22 #[derive(Debug)]
23 struct ChunkIndex {
24     size: usize,
25     index: usize,
26     key: usize,
27 }
28 
29 impl ChunkIndex {
30     #[inline(always)]
new(size: usize) -> Self31     fn new(size: usize) -> Self {
32         ChunkIndex {
33             size,
34             index: 0,
35             key: 0,
36         }
37     }
38 }
39 
40 impl<'a, A> KeyFunction<A> for ChunkIndex {
41     type Key = usize;
42     #[inline(always)]
call_mut(&mut self, _arg: A) -> Self::Key43     fn call_mut(&mut self, _arg: A) -> Self::Key {
44         if self.index == self.size {
45             self.key += 1;
46             self.index = 0;
47         }
48         self.index += 1;
49         self.key
50     }
51 }
52 
53 
54 struct GroupInner<K, I, F>
55     where I: Iterator
56 {
57     key: F,
58     iter: I,
59     current_key: Option<K>,
60     current_elt: Option<I::Item>,
61     /// flag set if iterator is exhausted
62     done: bool,
63     /// Index of group we are currently buffering or visiting
64     top_group: usize,
65     /// Least index for which we still have elements buffered
66     oldest_buffered_group: usize,
67     /// Group index for `buffer[0]` -- the slots
68     /// bottom_group..oldest_buffered_group are unused and will be erased when
69     /// that range is large enough.
70     bottom_group: usize,
71     /// Buffered groups, from `bottom_group` (index 0) to `top_group`.
72     buffer: Vec<vec::IntoIter<I::Item>>,
73     /// index of last group iter that was dropped, usize::MAX == none
74     dropped_group: usize,
75 }
76 
77 impl<K, I, F> GroupInner<K, I, F>
78     where I: Iterator,
79           F: for<'a> KeyFunction<&'a I::Item, Key=K>,
80           K: PartialEq,
81 {
82     /// `client`: Index of group that requests next element
83     #[inline(always)]
step(&mut self, client: usize) -> Option<I::Item>84     fn step(&mut self, client: usize) -> Option<I::Item> {
85         /*
86         println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]",
87                  client, self.bottom_group, self.oldest_buffered_group,
88                  self.top_group,
89                  self.buffer.iter().map(|elt| elt.len()).format(", "));
90         */
91         if client < self.oldest_buffered_group {
92             None
93         } else if client < self.top_group ||
94             (client == self.top_group &&
95              self.buffer.len() > self.top_group - self.bottom_group)
96         {
97             self.lookup_buffer(client)
98         } else if self.done {
99             None
100         } else if self.top_group == client {
101             self.step_current()
102         } else {
103             self.step_buffering(client)
104         }
105     }
106 
107     #[inline(never)]
lookup_buffer(&mut self, client: usize) -> Option<I::Item>108     fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> {
109         // if `bufidx` doesn't exist in self.buffer, it might be empty
110         let bufidx = client - self.bottom_group;
111         if client < self.oldest_buffered_group {
112             return None;
113         }
114         let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next());
115         if elt.is_none() && client == self.oldest_buffered_group {
116             // FIXME: VecDeque is unfortunately not zero allocation when empty,
117             // so we do this job manually.
118             // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it.
119             self.oldest_buffered_group += 1;
120             // skip forward further empty queues too
121             while self.buffer.get(self.oldest_buffered_group - self.bottom_group)
122                              .map_or(false, |buf| buf.len() == 0)
123             {
124                 self.oldest_buffered_group += 1;
125             }
126 
127             let nclear = self.oldest_buffered_group - self.bottom_group;
128             if nclear > 0 && nclear >= self.buffer.len() / 2 {
129                 let mut i = 0;
130                 self.buffer.retain(|buf| {
131                     i += 1;
132                     debug_assert!(buf.len() == 0 || i > nclear);
133                     i > nclear
134                 });
135                 self.bottom_group = self.oldest_buffered_group;
136             }
137         }
138         elt
139     }
140 
141     /// Take the next element from the iterator, and set the done
142     /// flag if exhausted. Must not be called after done.
143     #[inline(always)]
next_element(&mut self) -> Option<I::Item>144     fn next_element(&mut self) -> Option<I::Item> {
145         debug_assert!(!self.done);
146         match self.iter.next() {
147             None => { self.done = true; None }
148             otherwise => otherwise,
149         }
150     }
151 
152 
153     #[inline(never)]
step_buffering(&mut self, client: usize) -> Option<I::Item>154     fn step_buffering(&mut self, client: usize) -> Option<I::Item> {
155         // requested a later group -- walk through the current group up to
156         // the requested group index, and buffer the elements (unless
157         // the group is marked as dropped).
158         // Because the `Groups` iterator is always the first to request
159         // each group index, client is the next index efter top_group.
160         debug_assert!(self.top_group + 1 == client);
161         let mut group = Vec::new();
162 
163         if let Some(elt) = self.current_elt.take() {
164             if self.top_group != self.dropped_group {
165                 group.push(elt);
166             }
167         }
168         let mut first_elt = None; // first element of the next group
169 
170         while let Some(elt) = self.next_element() {
171             let key = self.key.call_mut(&elt);
172             match self.current_key.take() {
173                 None => {}
174                 Some(old_key) => if old_key != key {
175                     self.current_key = Some(key);
176                     first_elt = Some(elt);
177                     break;
178                 },
179             }
180             self.current_key = Some(key);
181             if self.top_group != self.dropped_group {
182                 group.push(elt);
183             }
184         }
185 
186         if self.top_group != self.dropped_group {
187             self.push_next_group(group);
188         }
189         if first_elt.is_some() {
190             self.top_group += 1;
191             debug_assert!(self.top_group == client);
192         }
193         first_elt
194     }
195 
push_next_group(&mut self, group: Vec<I::Item>)196     fn push_next_group(&mut self, group: Vec<I::Item>) {
197         // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group
198         while self.top_group - self.bottom_group > self.buffer.len() {
199             if self.buffer.is_empty() {
200                 self.bottom_group += 1;
201                 self.oldest_buffered_group += 1;
202             } else {
203                 self.buffer.push(Vec::new().into_iter());
204             }
205         }
206         self.buffer.push(group.into_iter());
207         debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len());
208     }
209 
210     /// This is the immediate case, where we use no buffering
211     #[inline]
step_current(&mut self) -> Option<I::Item>212     fn step_current(&mut self) -> Option<I::Item> {
213         debug_assert!(!self.done);
214         if let elt @ Some(..) = self.current_elt.take() {
215             return elt;
216         }
217         match self.next_element() {
218             None => None,
219             Some(elt) => {
220                 let key = self.key.call_mut(&elt);
221                 match self.current_key.take() {
222                     None => {}
223                     Some(old_key) => if old_key != key {
224                         self.current_key = Some(key);
225                         self.current_elt = Some(elt);
226                         self.top_group += 1;
227                         return None;
228                     },
229                 }
230                 self.current_key = Some(key);
231                 Some(elt)
232             }
233         }
234     }
235 
236     /// Request the just started groups' key.
237     ///
238     /// `client`: Index of group
239     ///
240     /// **Panics** if no group key is available.
group_key(&mut self, client: usize) -> K241     fn group_key(&mut self, client: usize) -> K {
242         // This can only be called after we have just returned the first
243         // element of a group.
244         // Perform this by simply buffering one more element, grabbing the
245         // next key.
246         debug_assert!(!self.done);
247         debug_assert!(client == self.top_group);
248         debug_assert!(self.current_key.is_some());
249         debug_assert!(self.current_elt.is_none());
250         let old_key = self.current_key.take().unwrap();
251         if let Some(elt) = self.next_element() {
252             let key = self.key.call_mut(&elt);
253             if old_key != key {
254                 self.top_group += 1;
255             }
256             self.current_key = Some(key);
257             self.current_elt = Some(elt);
258         }
259         old_key
260     }
261 }
262 
263 impl<K, I, F> GroupInner<K, I, F>
264     where I: Iterator,
265 {
266     /// Called when a group is dropped
drop_group(&mut self, client: usize)267     fn drop_group(&mut self, client: usize) {
268         // It's only useful to track the maximal index
269         if self.dropped_group == !0 || client > self.dropped_group {
270             self.dropped_group = client;
271         }
272     }
273 }
274 
275 /// `GroupBy` is the storage for the lazy grouping operation.
276 ///
277 /// If the groups are consumed in their original order, or if each
278 /// group is dropped without keeping it around, then `GroupBy` uses
279 /// no allocations. It needs allocations only if several group iterators
280 /// are alive at the same time.
281 ///
282 /// This type implements [`IntoIterator`] (it is **not** an iterator
283 /// itself), because the group iterators need to borrow from this
284 /// value. It should be stored in a local variable or temporary and
285 /// iterated.
286 ///
287 /// See [`.group_by()`](crate::Itertools::group_by) for more information.
288 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
289 pub struct GroupBy<K, I, F>
290     where I: Iterator,
291 {
292     inner: RefCell<GroupInner<K, I, F>>,
293     // the group iterator's current index. Keep this in the main value
294     // so that simultaneous iterators all use the same state.
295     index: Cell<usize>,
296 }
297 
298 /// Create a new
new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F> where J: IntoIterator, F: FnMut(&J::Item) -> K,299 pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F>
300     where J: IntoIterator,
301           F: FnMut(&J::Item) -> K,
302 {
303     GroupBy {
304         inner: RefCell::new(GroupInner {
305             key: f,
306             iter: iter.into_iter(),
307             current_key: None,
308             current_elt: None,
309             done: false,
310             top_group: 0,
311             oldest_buffered_group: 0,
312             bottom_group: 0,
313             buffer: Vec::new(),
314             dropped_group: !0,
315         }),
316         index: Cell::new(0),
317     }
318 }
319 
320 impl<K, I, F> GroupBy<K, I, F>
321     where I: Iterator,
322 {
323     /// `client`: Index of group that requests next element
step(&self, client: usize) -> Option<I::Item> where F: FnMut(&I::Item) -> K, K: PartialEq,324     fn step(&self, client: usize) -> Option<I::Item>
325         where F: FnMut(&I::Item) -> K,
326               K: PartialEq,
327     {
328         self.inner.borrow_mut().step(client)
329     }
330 
331     /// `client`: Index of group
drop_group(&self, client: usize)332     fn drop_group(&self, client: usize) {
333         self.inner.borrow_mut().drop_group(client)
334     }
335 }
336 
337 impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F>
338     where I: Iterator,
339           I::Item: 'a,
340           F: FnMut(&I::Item) -> K,
341           K: PartialEq
342 {
343     type Item = (K, Group<'a, K, I, F>);
344     type IntoIter = Groups<'a, K, I, F>;
345 
into_iter(self) -> Self::IntoIter346     fn into_iter(self) -> Self::IntoIter {
347         Groups { parent: self }
348     }
349 }
350 
351 
352 /// An iterator that yields the Group iterators.
353 ///
354 /// Iterator element type is `(K, Group)`:
355 /// the group's key `K` and the group's iterator.
356 ///
357 /// See [`.group_by()`](crate::Itertools::group_by) for more information.
358 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
359 pub struct Groups<'a, K: 'a, I: 'a, F: 'a>
360     where I: Iterator,
361           I::Item: 'a
362 {
363     parent: &'a GroupBy<K, I, F>,
364 }
365 
366 impl<'a, K, I, F> Iterator for Groups<'a, K, I, F>
367     where I: Iterator,
368           I::Item: 'a,
369           F: FnMut(&I::Item) -> K,
370           K: PartialEq
371 {
372     type Item = (K, Group<'a, K, I, F>);
373 
374     #[inline]
next(&mut self) -> Option<Self::Item>375     fn next(&mut self) -> Option<Self::Item> {
376         let index = self.parent.index.get();
377         self.parent.index.set(index + 1);
378         let inner = &mut *self.parent.inner.borrow_mut();
379         inner.step(index).map(|elt| {
380             let key = inner.group_key(index);
381             (key, Group {
382                 parent: self.parent,
383                 index,
384                 first: Some(elt),
385             })
386         })
387     }
388 }
389 
390 /// An iterator for the elements in a single group.
391 ///
392 /// Iterator element type is `I::Item`.
393 pub struct Group<'a, K: 'a, I: 'a, F: 'a>
394     where I: Iterator,
395           I::Item: 'a,
396 {
397     parent: &'a GroupBy<K, I, F>,
398     index: usize,
399     first: Option<I::Item>,
400 }
401 
402 impl<'a, K, I, F> Drop for Group<'a, K, I, F>
403     where I: Iterator,
404           I::Item: 'a,
405 {
drop(&mut self)406     fn drop(&mut self) {
407         self.parent.drop_group(self.index);
408     }
409 }
410 
411 impl<'a, K, I, F> Iterator for Group<'a, K, I, F>
412     where I: Iterator,
413           I::Item: 'a,
414           F: FnMut(&I::Item) -> K,
415           K: PartialEq,
416 {
417     type Item = I::Item;
418     #[inline]
next(&mut self) -> Option<Self::Item>419     fn next(&mut self) -> Option<Self::Item> {
420         if let elt @ Some(..) = self.first.take() {
421             return elt;
422         }
423         self.parent.step(self.index)
424     }
425 }
426 
427 ///// IntoChunks /////
428 
429 /// Create a new
new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter> where J: IntoIterator,430 pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter>
431     where J: IntoIterator,
432 {
433     IntoChunks {
434         inner: RefCell::new(GroupInner {
435             key: ChunkIndex::new(size),
436             iter: iter.into_iter(),
437             current_key: None,
438             current_elt: None,
439             done: false,
440             top_group: 0,
441             oldest_buffered_group: 0,
442             bottom_group: 0,
443             buffer: Vec::new(),
444             dropped_group: !0,
445         }),
446         index: Cell::new(0),
447     }
448 }
449 
450 
451 /// `ChunkLazy` is the storage for a lazy chunking operation.
452 ///
453 /// `IntoChunks` behaves just like `GroupBy`: it is iterable, and
454 /// it only buffers if several chunk iterators are alive at the same time.
455 ///
456 /// This type implements [`IntoIterator`] (it is **not** an iterator
457 /// itself), because the chunk iterators need to borrow from this
458 /// value. It should be stored in a local variable or temporary and
459 /// iterated.
460 ///
461 /// Iterator element type is `Chunk`, each chunk's iterator.
462 ///
463 /// See [`.chunks()`](crate::Itertools::chunks) for more information.
464 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
465 pub struct IntoChunks<I>
466     where I: Iterator,
467 {
468     inner: RefCell<GroupInner<usize, I, ChunkIndex>>,
469     // the chunk iterator's current index. Keep this in the main value
470     // so that simultaneous iterators all use the same state.
471     index: Cell<usize>,
472 }
473 
474 
475 impl<I> IntoChunks<I>
476     where I: Iterator,
477 {
478     /// `client`: Index of chunk that requests next element
step(&self, client: usize) -> Option<I::Item>479     fn step(&self, client: usize) -> Option<I::Item> {
480         self.inner.borrow_mut().step(client)
481     }
482 
483     /// `client`: Index of chunk
drop_group(&self, client: usize)484     fn drop_group(&self, client: usize) {
485         self.inner.borrow_mut().drop_group(client)
486     }
487 }
488 
489 impl<'a, I> IntoIterator for &'a IntoChunks<I>
490     where I: Iterator,
491           I::Item: 'a,
492 {
493     type Item = Chunk<'a, I>;
494     type IntoIter = Chunks<'a, I>;
495 
into_iter(self) -> Self::IntoIter496     fn into_iter(self) -> Self::IntoIter {
497         Chunks {
498             parent: self,
499         }
500     }
501 }
502 
503 
504 /// An iterator that yields the Chunk iterators.
505 ///
506 /// Iterator element type is `Chunk`.
507 ///
508 /// See [`.chunks()`](crate::Itertools::chunks) for more information.
509 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
510 pub struct Chunks<'a, I: 'a>
511     where I: Iterator,
512           I::Item: 'a,
513 {
514     parent: &'a IntoChunks<I>,
515 }
516 
517 impl<'a, I> Iterator for Chunks<'a, I>
518     where I: Iterator,
519           I::Item: 'a,
520 {
521     type Item = Chunk<'a, I>;
522 
523     #[inline]
next(&mut self) -> Option<Self::Item>524     fn next(&mut self) -> Option<Self::Item> {
525         let index = self.parent.index.get();
526         self.parent.index.set(index + 1);
527         let inner = &mut *self.parent.inner.borrow_mut();
528         inner.step(index).map(|elt| {
529             Chunk {
530                 parent: self.parent,
531                 index,
532                 first: Some(elt),
533             }
534         })
535     }
536 }
537 
538 /// An iterator for the elements in a single chunk.
539 ///
540 /// Iterator element type is `I::Item`.
541 pub struct Chunk<'a, I: 'a>
542     where I: Iterator,
543           I::Item: 'a,
544 {
545     parent: &'a IntoChunks<I>,
546     index: usize,
547     first: Option<I::Item>,
548 }
549 
550 impl<'a, I> Drop for Chunk<'a, I>
551     where I: Iterator,
552           I::Item: 'a,
553 {
drop(&mut self)554     fn drop(&mut self) {
555         self.parent.drop_group(self.index);
556     }
557 }
558 
559 impl<'a, I> Iterator for Chunk<'a, I>
560     where I: Iterator,
561           I::Item: 'a,
562 {
563     type Item = I::Item;
564     #[inline]
next(&mut self) -> Option<Self::Item>565     fn next(&mut self) -> Option<Self::Item> {
566         if let elt @ Some(..) = self.first.take() {
567             return elt;
568         }
569         self.parent.step(self.index)
570     }
571 }
572