1 /// Given an array of size width * height, representing a flattened 2D array,
2 /// transpose the rows and columns of that 2D array into the output
3 /// benchmarking shows that loop tiling isn't effective for small arrays (in the range of 50x50 or smaller)
transpose_small<T: Copy>(width: usize, height: usize, input: &[T], output: &mut [T])4 pub unsafe fn transpose_small<T: Copy>(width: usize, height: usize, input: &[T], output: &mut [T]) {
5 for x in 0..width {
6 for y in 0..height {
7 let input_index = x + y * width;
8 let output_index = y + x * height;
9
10 *output.get_unchecked_mut(output_index) = *input.get_unchecked(input_index);
11 }
12 }
13 }
14
15 #[allow(unused)]
workaround_transmute<T, U>(slice: &[T]) -> &[U]16 pub unsafe fn workaround_transmute<T, U>(slice: &[T]) -> &[U] {
17 let ptr = slice.as_ptr() as *const U;
18 let len = slice.len();
19 std::slice::from_raw_parts(ptr, len)
20 }
21 #[allow(unused)]
workaround_transmute_mut<T, U>(slice: &mut [T]) -> &mut [U]22 pub unsafe fn workaround_transmute_mut<T, U>(slice: &mut [T]) -> &mut [U] {
23 let ptr = slice.as_mut_ptr() as *mut U;
24 let len = slice.len();
25 std::slice::from_raw_parts_mut(ptr, len)
26 }
27
28 #[derive(Copy, Clone)]
29 pub struct RawSlice<T> {
30 ptr: *const T,
31 slice_len: usize,
32 }
33 impl<T> RawSlice<T> {
34 #[inline(always)]
new(slice: &[T]) -> Self35 pub fn new(slice: &[T]) -> Self {
36 Self {
37 ptr: slice.as_ptr(),
38 slice_len: slice.len(),
39 }
40 }
41 #[allow(unused)]
42 #[inline(always)]
new_transmuted<U>(slice: &[U]) -> Self43 pub unsafe fn new_transmuted<U>(slice: &[U]) -> Self {
44 Self {
45 ptr: slice.as_ptr() as *const T,
46 slice_len: slice.len(),
47 }
48 }
49 #[allow(unused)]
50 #[inline(always)]
as_ptr(&self) -> *const T51 pub fn as_ptr(&self) -> *const T {
52 self.ptr
53 }
54 #[allow(unused)]
55 #[inline(always)]
len(&self) -> usize56 pub fn len(&self) -> usize {
57 self.slice_len
58 }
59 }
60 impl<T: Copy> RawSlice<T> {
61 #[inline(always)]
load(&self, index: usize) -> T62 pub unsafe fn load(&self, index: usize) -> T {
63 debug_assert!(index < self.slice_len);
64 *self.ptr.add(index)
65 }
66 }
67
68 /// A RawSliceMut is a normal mutable slice, but aliasable. Its functionality is severely limited.
69 #[derive(Copy, Clone)]
70 pub struct RawSliceMut<T> {
71 ptr: *mut T,
72 slice_len: usize,
73 }
74 impl<T> RawSliceMut<T> {
75 #[inline(always)]
new(slice: &mut [T]) -> Self76 pub fn new(slice: &mut [T]) -> Self {
77 Self {
78 ptr: slice.as_mut_ptr(),
79 slice_len: slice.len(),
80 }
81 }
82 #[allow(unused)]
83 #[inline(always)]
new_transmuted<U>(slice: &mut [U]) -> Self84 pub unsafe fn new_transmuted<U>(slice: &mut [U]) -> Self {
85 Self {
86 ptr: slice.as_mut_ptr() as *mut T,
87 slice_len: slice.len(),
88 }
89 }
90 #[allow(unused)]
91 #[inline(always)]
as_mut_ptr(&self) -> *mut T92 pub fn as_mut_ptr(&self) -> *mut T {
93 self.ptr
94 }
95 #[allow(unused)]
96 #[inline(always)]
len(&self) -> usize97 pub fn len(&self) -> usize {
98 self.slice_len
99 }
100 #[inline(always)]
store(&self, value: T, index: usize)101 pub unsafe fn store(&self, value: T, index: usize) {
102 debug_assert!(index < self.slice_len);
103 *self.ptr.add(index) = value;
104 }
105 }
106
107 #[cfg(test)]
108 mod unit_tests {
109 use super::*;
110 use crate::test_utils::random_signal;
111 use num_complex::Complex;
112 use num_traits::Zero;
113
114 #[test]
test_transpose()115 fn test_transpose() {
116 let sizes: Vec<usize> = (1..16).collect();
117
118 for &width in &sizes {
119 for &height in &sizes {
120 let len = width * height;
121
122 let input: Vec<Complex<f32>> = random_signal(len);
123 let mut output = vec![Zero::zero(); len];
124
125 unsafe { transpose_small(width, height, &input, &mut output) };
126
127 for x in 0..width {
128 for y in 0..height {
129 assert_eq!(
130 input[x + y * width],
131 output[y + x * height],
132 "x = {}, y = {}",
133 x,
134 y
135 );
136 }
137 }
138 }
139 }
140 }
141 }
142
143 // Loop over exact chunks of the provided buffer. Very similar in semantics to ChunksExactMut, but generates smaller code and requires no modulo operations
144 // Returns Ok() if every element ended up in a chunk, Err() if there was a remainder
iter_chunks<T>( mut buffer: &mut [T], chunk_size: usize, mut chunk_fn: impl FnMut(&mut [T]), ) -> Result<(), ()>145 pub fn iter_chunks<T>(
146 mut buffer: &mut [T],
147 chunk_size: usize,
148 mut chunk_fn: impl FnMut(&mut [T]),
149 ) -> Result<(), ()> {
150 // Loop over the buffer, splicing off chunk_size at a time, and calling chunk_fn on each
151 while buffer.len() >= chunk_size {
152 let (head, tail) = buffer.split_at_mut(chunk_size);
153 buffer = tail;
154
155 chunk_fn(head);
156 }
157
158 // We have a remainder if there's data still in the buffer -- in which case we want to indicate to the caller that there was an unwanted remainder
159 if buffer.len() == 0 {
160 Ok(())
161 } else {
162 Err(())
163 }
164 }
165
166 // Loop over exact zipped chunks of the 2 provided buffers. Very similar in semantics to ChunksExactMut.zip(ChunksExactMut), but generates smaller code and requires no modulo operations
167 // Returns Ok() if every element of both buffers ended up in a chunk, Err() if there was a remainder
iter_chunks_zipped<T>( mut buffer1: &mut [T], mut buffer2: &mut [T], chunk_size: usize, mut chunk_fn: impl FnMut(&mut [T], &mut [T]), ) -> Result<(), ()>168 pub fn iter_chunks_zipped<T>(
169 mut buffer1: &mut [T],
170 mut buffer2: &mut [T],
171 chunk_size: usize,
172 mut chunk_fn: impl FnMut(&mut [T], &mut [T]),
173 ) -> Result<(), ()> {
174 // If the two buffers aren't the same size, record the fact that they're different, then snip them to be the same size
175 let uneven = if buffer1.len() > buffer2.len() {
176 buffer1 = &mut buffer1[..buffer2.len()];
177 true
178 } else if buffer2.len() < buffer1.len() {
179 buffer2 = &mut buffer2[..buffer1.len()];
180 true
181 } else {
182 false
183 };
184
185 // Now that we know the two slices are the same length, loop over each one, splicing off chunk_size at a time, and calling chunk_fn on each
186 while buffer1.len() >= chunk_size && buffer2.len() >= chunk_size {
187 let (head1, tail1) = buffer1.split_at_mut(chunk_size);
188 buffer1 = tail1;
189
190 let (head2, tail2) = buffer2.split_at_mut(chunk_size);
191 buffer2 = tail2;
192
193 chunk_fn(head1, head2);
194 }
195
196 // We have a remainder if the 2 chunks were uneven to start with, or if there's still data in the buffers -- in which case we want to indicate to the caller that there was an unwanted remainder
197 if !uneven && buffer1.len() == 0 {
198 Ok(())
199 } else {
200 Err(())
201 }
202 }
203