1 /// Given an array of size width * height, representing a flattened 2D array,
2 /// transpose the rows and columns of that 2D array into the output
3 /// benchmarking shows that loop tiling isn't effective for small arrays (in the range of 50x50 or smaller)
transpose_small<T: Copy>(width: usize, height: usize, input: &[T], output: &mut [T])4 pub unsafe fn transpose_small<T: Copy>(width: usize, height: usize, input: &[T], output: &mut [T]) {
5     for x in 0..width {
6         for y in 0..height {
7             let input_index = x + y * width;
8             let output_index = y + x * height;
9 
10             *output.get_unchecked_mut(output_index) = *input.get_unchecked(input_index);
11         }
12     }
13 }
14 
15 #[allow(unused)]
workaround_transmute<T, U>(slice: &[T]) -> &[U]16 pub unsafe fn workaround_transmute<T, U>(slice: &[T]) -> &[U] {
17     let ptr = slice.as_ptr() as *const U;
18     let len = slice.len();
19     std::slice::from_raw_parts(ptr, len)
20 }
21 #[allow(unused)]
workaround_transmute_mut<T, U>(slice: &mut [T]) -> &mut [U]22 pub unsafe fn workaround_transmute_mut<T, U>(slice: &mut [T]) -> &mut [U] {
23     let ptr = slice.as_mut_ptr() as *mut U;
24     let len = slice.len();
25     std::slice::from_raw_parts_mut(ptr, len)
26 }
27 
28 #[derive(Copy, Clone)]
29 pub struct RawSlice<T> {
30     ptr: *const T,
31     slice_len: usize,
32 }
33 impl<T> RawSlice<T> {
34     #[inline(always)]
new(slice: &[T]) -> Self35     pub fn new(slice: &[T]) -> Self {
36         Self {
37             ptr: slice.as_ptr(),
38             slice_len: slice.len(),
39         }
40     }
41     #[allow(unused)]
42     #[inline(always)]
new_transmuted<U>(slice: &[U]) -> Self43     pub unsafe fn new_transmuted<U>(slice: &[U]) -> Self {
44         Self {
45             ptr: slice.as_ptr() as *const T,
46             slice_len: slice.len(),
47         }
48     }
49     #[allow(unused)]
50     #[inline(always)]
as_ptr(&self) -> *const T51     pub fn as_ptr(&self) -> *const T {
52         self.ptr
53     }
54     #[allow(unused)]
55     #[inline(always)]
len(&self) -> usize56     pub fn len(&self) -> usize {
57         self.slice_len
58     }
59 }
60 impl<T: Copy> RawSlice<T> {
61     #[inline(always)]
load(&self, index: usize) -> T62     pub unsafe fn load(&self, index: usize) -> T {
63         debug_assert!(index < self.slice_len);
64         *self.ptr.add(index)
65     }
66 }
67 
68 /// A RawSliceMut is a normal mutable slice, but aliasable. Its functionality is severely limited.
69 #[derive(Copy, Clone)]
70 pub struct RawSliceMut<T> {
71     ptr: *mut T,
72     slice_len: usize,
73 }
74 impl<T> RawSliceMut<T> {
75     #[inline(always)]
new(slice: &mut [T]) -> Self76     pub fn new(slice: &mut [T]) -> Self {
77         Self {
78             ptr: slice.as_mut_ptr(),
79             slice_len: slice.len(),
80         }
81     }
82     #[allow(unused)]
83     #[inline(always)]
new_transmuted<U>(slice: &mut [U]) -> Self84     pub unsafe fn new_transmuted<U>(slice: &mut [U]) -> Self {
85         Self {
86             ptr: slice.as_mut_ptr() as *mut T,
87             slice_len: slice.len(),
88         }
89     }
90     #[allow(unused)]
91     #[inline(always)]
as_mut_ptr(&self) -> *mut T92     pub fn as_mut_ptr(&self) -> *mut T {
93         self.ptr
94     }
95     #[allow(unused)]
96     #[inline(always)]
len(&self) -> usize97     pub fn len(&self) -> usize {
98         self.slice_len
99     }
100     #[inline(always)]
store(&self, value: T, index: usize)101     pub unsafe fn store(&self, value: T, index: usize) {
102         debug_assert!(index < self.slice_len);
103         *self.ptr.add(index) = value;
104     }
105 }
106 
107 #[cfg(test)]
108 mod unit_tests {
109     use super::*;
110     use crate::test_utils::random_signal;
111     use num_complex::Complex;
112     use num_traits::Zero;
113 
114     #[test]
test_transpose()115     fn test_transpose() {
116         let sizes: Vec<usize> = (1..16).collect();
117 
118         for &width in &sizes {
119             for &height in &sizes {
120                 let len = width * height;
121 
122                 let input: Vec<Complex<f32>> = random_signal(len);
123                 let mut output = vec![Zero::zero(); len];
124 
125                 unsafe { transpose_small(width, height, &input, &mut output) };
126 
127                 for x in 0..width {
128                     for y in 0..height {
129                         assert_eq!(
130                             input[x + y * width],
131                             output[y + x * height],
132                             "x = {}, y = {}",
133                             x,
134                             y
135                         );
136                     }
137                 }
138             }
139         }
140     }
141 }
142 
143 // Loop over exact chunks of the provided buffer. Very similar in semantics to ChunksExactMut, but generates smaller code and requires no modulo operations
144 // Returns Ok() if every element ended up in a chunk, Err() if there was a remainder
iter_chunks<T>( mut buffer: &mut [T], chunk_size: usize, mut chunk_fn: impl FnMut(&mut [T]), ) -> Result<(), ()>145 pub fn iter_chunks<T>(
146     mut buffer: &mut [T],
147     chunk_size: usize,
148     mut chunk_fn: impl FnMut(&mut [T]),
149 ) -> Result<(), ()> {
150     // Loop over the buffer, splicing off chunk_size at a time, and calling chunk_fn on each
151     while buffer.len() >= chunk_size {
152         let (head, tail) = buffer.split_at_mut(chunk_size);
153         buffer = tail;
154 
155         chunk_fn(head);
156     }
157 
158     // We have a remainder if there's data still in the buffer -- in which case we want to indicate to the caller that there was an unwanted remainder
159     if buffer.len() == 0 {
160         Ok(())
161     } else {
162         Err(())
163     }
164 }
165 
166 // Loop over exact zipped chunks of the 2 provided buffers. Very similar in semantics to ChunksExactMut.zip(ChunksExactMut), but generates smaller code and requires no modulo operations
167 // Returns Ok() if every element of both buffers ended up in a chunk, Err() if there was a remainder
iter_chunks_zipped<T>( mut buffer1: &mut [T], mut buffer2: &mut [T], chunk_size: usize, mut chunk_fn: impl FnMut(&mut [T], &mut [T]), ) -> Result<(), ()>168 pub fn iter_chunks_zipped<T>(
169     mut buffer1: &mut [T],
170     mut buffer2: &mut [T],
171     chunk_size: usize,
172     mut chunk_fn: impl FnMut(&mut [T], &mut [T]),
173 ) -> Result<(), ()> {
174     // If the two buffers aren't the same size, record the fact that they're different, then snip them to be the same size
175     let uneven = if buffer1.len() > buffer2.len() {
176         buffer1 = &mut buffer1[..buffer2.len()];
177         true
178     } else if buffer2.len() < buffer1.len() {
179         buffer2 = &mut buffer2[..buffer1.len()];
180         true
181     } else {
182         false
183     };
184 
185     // Now that we know the two slices are the same length, loop over each one, splicing off chunk_size at a time, and calling chunk_fn on each
186     while buffer1.len() >= chunk_size && buffer2.len() >= chunk_size {
187         let (head1, tail1) = buffer1.split_at_mut(chunk_size);
188         buffer1 = tail1;
189 
190         let (head2, tail2) = buffer2.split_at_mut(chunk_size);
191         buffer2 = tail2;
192 
193         chunk_fn(head1, head2);
194     }
195 
196     // We have a remainder if the 2 chunks were uneven to start with, or if there's still data in the buffers -- in which case we want to indicate to the caller that there was an unwanted remainder
197     if !uneven && buffer1.len() == 0 {
198         Ok(())
199     } else {
200         Err(())
201     }
202 }
203