1 //! `strength_reduce` implements integer division and modulo via "arithmetic strength reduction"
2 //!
3 //! Modern processors can do multiplication and shifts much faster than division, and "arithmetic strength reduction" is an algorithm to transform divisions into multiplications and shifts.
4 //! Compilers already perform this optimization for divisors that are known at compile time; this library enables this optimization for divisors that are only known at runtime.
5 //!
6 //! Benchmarking shows a 5-10x speedup or integer division and modulo operations.
7 //!
8 //! # Example:
9 //! ```
10 //! use strength_reduce::StrengthReducedU64;
11 //!
12 //! let mut my_array: Vec<u64> = (0..500).collect();
13 //! let divisor = 3;
14 //! let modulo = 14;
15 //!
16 //! // slow naive division and modulo
17 //! for element in &mut my_array {
18 //!     *element = (*element / divisor) % modulo;
19 //! }
20 //!
21 //! // fast strength-reduced division and modulo
22 //! let reduced_divisor = StrengthReducedU64::new(divisor);
23 //! let reduced_modulo = StrengthReducedU64::new(modulo);
24 //! for element in &mut my_array {
25 //!     *element = (*element / reduced_divisor) % reduced_modulo;
26 //! }
27 //! ```
28 //!
29 //! This library is intended for hot loops like the example above, where a division is repeated many times in a loop with the divisor remaining unchanged.
30 //! There is a setup cost associated with creating stength-reduced division instances, so using strength-reduced division for 1-2 divisions is not worth the setup cost.
31 //! The break-even point differs by use-case, but is typically low: Benchmarking has shown that takes 3 to 4 repeated divisions with the same StengthReduced## instance to be worth it.
32 //!
33 //! `strength_reduce` is `#![no_std]`
34 //!
35 //! The optimizations that this library provides are inherently dependent on architecture, compiler, and platform,
36 //! so test before you use.
37 #![no_std]
38 
39 #[cfg(test)]
40 extern crate num_bigint;
41 #[cfg(test)]
42 extern crate rand;
43 
44 use core::ops::{Div, Rem};
45 
46 mod long_division;
47 mod long_multiplication;
48 
49 /// Implements unsigned division and modulo via mutiplication and shifts.
50 ///
51 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
52 /// this version will be several times faster than naive division.
53 #[derive(Clone, Copy, Debug)]
54 pub struct StrengthReducedU8 {
55     multiplier: u16,
56     divisor: u8,
57 }
58 impl StrengthReducedU8 {
59     /// Creates a new divisor instance.
60     ///
61     /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
62     ///
63     /// # Panics:
64     ///
65     /// Panics if `divisor` is 0
66     #[inline]
new(divisor: u8) -> Self67     pub fn new(divisor: u8) -> Self {
68         assert!(divisor > 0);
69 
70         if divisor.is_power_of_two() {
71             Self{ multiplier: 0, divisor }
72         } else {
73             let divided = core::u16::MAX / (divisor as u16);
74             Self{ multiplier: divided + 1, divisor }
75         }
76     }
77 
78     /// Simultaneous truncated integer division and modulus.
79     /// Returns `(quotient, remainder)`.
80     #[inline]
div_rem(numerator: u8, denom: Self) -> (u8, u8)81     pub fn div_rem(numerator: u8, denom: Self) -> (u8, u8) {
82         let quotient = numerator / denom;
83         let remainder = numerator % denom;
84         (quotient, remainder)
85     }
86 
87     /// Retrieve the value used to create this struct
88     #[inline]
get(&self) -> u889     pub fn get(&self) -> u8 {
90         self.divisor
91     }
92 }
93 
94 impl Div<StrengthReducedU8> for u8 {
95     type Output = u8;
96 
97     #[inline]
div(self, rhs: StrengthReducedU8) -> Self::Output98     fn div(self, rhs: StrengthReducedU8) -> Self::Output {
99         if rhs.multiplier == 0 {
100             (self as u16 >> rhs.divisor.trailing_zeros()) as u8
101         } else {
102             let numerator = self as u16;
103             let multiplied_hi = numerator * (rhs.multiplier >> 8);
104             let multiplied_lo = (numerator * rhs.multiplier as u8 as u16) >> 8;
105 
106             ((multiplied_hi + multiplied_lo) >> 8) as u8
107         }
108     }
109 }
110 
111 impl Rem<StrengthReducedU8> for u8 {
112     type Output = u8;
113 
114     #[inline]
rem(self, rhs: StrengthReducedU8) -> Self::Output115     fn rem(self, rhs: StrengthReducedU8) -> Self::Output {
116         if rhs.multiplier == 0 {
117             self & (rhs.divisor - 1)
118         } else {
119             let product = rhs.multiplier.wrapping_mul(self as u16) as u32;
120             let divisor = rhs.divisor as u32;
121 
122             let shifted = (product * divisor) >> 16;
123             shifted as u8
124         }
125     }
126 }
127 
128 // small types prefer to do work in the intermediate type
129 macro_rules! strength_reduced_u16 {
130     ($struct_name:ident, $primitive_type:ident) => (
131         /// Implements unsigned division and modulo via mutiplication and shifts.
132         ///
133         /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
134         /// this version will be several times faster than naive division.
135         #[derive(Clone, Copy, Debug)]
136         pub struct $struct_name {
137             multiplier: u32,
138             divisor: $primitive_type,
139         }
140         impl $struct_name {
141             /// Creates a new divisor instance.
142             ///
143             /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
144             ///
145             /// # Panics:
146             ///
147             /// Panics if `divisor` is 0
148             #[inline]
149             pub fn new(divisor: $primitive_type) -> Self {
150                 assert!(divisor > 0);
151 
152                 if divisor.is_power_of_two() {
153                     Self{ multiplier: 0, divisor }
154                 } else {
155                     let divided = core::u32::MAX / (divisor as u32);
156                     Self{ multiplier: divided + 1, divisor }
157                 }
158             }
159 
160             /// Simultaneous truncated integer division and modulus.
161             /// Returns `(quotient, remainder)`.
162             #[inline]
163             pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
164                 let quotient = numerator / denom;
165                 let remainder = numerator - quotient * denom.divisor;
166                 (quotient, remainder)
167             }
168 
169             /// Retrieve the value used to create this struct
170             #[inline]
171             pub fn get(&self) -> $primitive_type {
172                 self.divisor
173             }
174         }
175 
176         impl Div<$struct_name> for $primitive_type {
177             type Output = $primitive_type;
178 
179             #[inline]
180             fn div(self, rhs: $struct_name) -> Self::Output {
181                 if rhs.multiplier == 0 {
182                     self >> rhs.divisor.trailing_zeros()
183                 } else {
184                     let numerator = self as u32;
185                     let multiplied_hi = numerator * (rhs.multiplier >> 16);
186                     let multiplied_lo = (numerator * rhs.multiplier as u16 as u32) >> 16;
187 
188                     ((multiplied_hi + multiplied_lo) >> 16) as $primitive_type
189                 }
190             }
191         }
192 
193         impl Rem<$struct_name> for $primitive_type {
194             type Output = $primitive_type;
195 
196             #[inline]
197             fn rem(self, rhs: $struct_name) -> Self::Output {
198                 if rhs.multiplier == 0 {
199                     self & (rhs.divisor - 1)
200                 } else {
201                     let quotient = self / rhs;
202                     self - quotient * rhs.divisor
203                 }
204             }
205         }
206     )
207 }
208 
209 // small types prefer to do work in the intermediate type
210 macro_rules! strength_reduced_u32 {
211     ($struct_name:ident, $primitive_type:ident) => (
212         /// Implements unsigned division and modulo via mutiplication and shifts.
213         ///
214         /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
215         /// this version will be several times faster than naive division.
216         #[derive(Clone, Copy, Debug)]
217         pub struct $struct_name {
218             multiplier: u64,
219             divisor: $primitive_type,
220         }
221         impl $struct_name {
222             /// Creates a new divisor instance.
223             ///
224             /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
225             ///
226             /// # Panics:
227             ///
228             /// Panics if `divisor` is 0
229             #[inline]
230             pub fn new(divisor: $primitive_type) -> Self {
231                 assert!(divisor > 0);
232 
233                 if divisor.is_power_of_two() {
234                     Self{ multiplier: 0, divisor }
235                 } else {
236                     let divided = core::u64::MAX / (divisor as u64);
237                     Self{ multiplier: divided + 1, divisor }
238                 }
239             }
240 
241             /// Simultaneous truncated integer division and modulus.
242             /// Returns `(quotient, remainder)`.
243             #[inline]
244             pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
245                 if denom.multiplier == 0 {
246                     (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1))
247                 }
248                 else {
249                     let numerator64 = numerator as u64;
250                     let multiplied_hi = numerator64 * (denom.multiplier >> 32);
251                     let multiplied_lo = numerator64 * (denom.multiplier as u32 as u64) >> 32;
252 
253                     let quotient = ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type;
254                     let remainder = numerator - quotient * denom.divisor;
255                     (quotient, remainder)
256                 }
257             }
258 
259             /// Retrieve the value used to create this struct
260             #[inline]
261             pub fn get(&self) -> $primitive_type {
262                 self.divisor
263             }
264         }
265 
266         impl Div<$struct_name> for $primitive_type {
267             type Output = $primitive_type;
268 
269             #[inline]
270             fn div(self, rhs: $struct_name) -> Self::Output {
271                 if rhs.multiplier == 0 {
272                     self >> rhs.divisor.trailing_zeros()
273                 } else {
274                     let numerator = self as u64;
275                     let multiplied_hi = numerator * (rhs.multiplier >> 32);
276                     let multiplied_lo = numerator * (rhs.multiplier as u32 as u64) >> 32;
277 
278                     ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type
279                 }
280             }
281         }
282 
283         impl Rem<$struct_name> for $primitive_type {
284             type Output = $primitive_type;
285 
286             #[inline]
287             fn rem(self, rhs: $struct_name) -> Self::Output {
288                 if rhs.multiplier == 0 {
289                     self & (rhs.divisor - 1)
290                 } else {
291                     let product = rhs.multiplier.wrapping_mul(self as u64) as u128;
292                     let divisor = rhs.divisor as u128;
293 
294                     let shifted = (product * divisor) >> 64;
295                     shifted as $primitive_type
296                 }
297             }
298         }
299     )
300 }
301 
302 macro_rules! strength_reduced_u64 {
303     ($struct_name:ident, $primitive_type:ident) => (
304         /// Implements unsigned division and modulo via mutiplication and shifts.
305         ///
306         /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
307         /// this version will be several times faster than naive division.
308         #[derive(Clone, Copy, Debug)]
309         pub struct $struct_name {
310             multiplier: u128,
311             divisor: $primitive_type,
312         }
313         impl $struct_name {
314             /// Creates a new divisor instance.
315             ///
316             /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
317             ///
318             /// # Panics:
319             ///
320             /// Panics if `divisor` is 0
321             #[inline]
322             pub fn new(divisor: $primitive_type) -> Self {
323                 assert!(divisor > 0);
324 
325                 if divisor.is_power_of_two() {
326                     Self{ multiplier: 0, divisor }
327                 } else {
328                     let quotient = long_division::divide_128_max_by_64(divisor as u64);
329                     Self{ multiplier: quotient + 1, divisor }
330                 }
331             }
332             /// Simultaneous truncated integer division and modulus.
333             /// Returns `(quotient, remainder)`.
334             #[inline]
335             pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
336                 if denom.multiplier == 0 {
337                     (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1))
338                 }
339                 else {
340                     let numerator128 = numerator as u128;
341                     let multiplied_hi = numerator128 * (denom.multiplier >> 64);
342                     let multiplied_lo = numerator128 * (denom.multiplier as u64 as u128) >> 64;
343 
344                     let quotient = ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type;
345                     let remainder = numerator - quotient * denom.divisor;
346                     (quotient, remainder)
347                 }
348             }
349 
350             /// Retrieve the value used to create this struct
351             #[inline]
352             pub fn get(&self) -> $primitive_type {
353                 self.divisor
354             }
355         }
356 
357         impl Div<$struct_name> for $primitive_type {
358             type Output = $primitive_type;
359 
360             #[inline]
361             fn div(self, rhs: $struct_name) -> Self::Output {
362                 if rhs.multiplier == 0 {
363                     self >> rhs.divisor.trailing_zeros()
364                 } else {
365                     let numerator = self as u128;
366                     let multiplied_hi = numerator * (rhs.multiplier >> 64);
367                     let multiplied_lo = numerator * (rhs.multiplier as u64 as u128) >> 64;
368 
369                     ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type
370                 }
371             }
372         }
373 
374         impl Rem<$struct_name> for $primitive_type {
375             type Output = $primitive_type;
376 
377             #[inline]
378             fn rem(self, rhs: $struct_name) -> Self::Output {
379                 if rhs.multiplier == 0 {
380                     self & (rhs.divisor - 1)
381                 } else {
382                     let quotient = self / rhs;
383                     self - quotient * rhs.divisor
384                 }
385             }
386         }
387     )
388 }
389 
390 /// Implements unsigned division and modulo via mutiplication and shifts.
391 ///
392 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
393 /// this version will be several times faster than naive division.
394 #[derive(Clone, Copy, Debug)]
395 pub struct StrengthReducedU128 {
396     multiplier_hi: u128,
397     multiplier_lo: u128,
398     divisor: u128,
399 }
400 impl StrengthReducedU128 {
401     /// Creates a new divisor instance.
402     ///
403     /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
404     ///
405     /// # Panics:
406     ///
407     /// Panics if `divisor` is 0
408     #[inline]
new(divisor: u128) -> Self409     pub fn new(divisor: u128) -> Self {
410         assert!(divisor > 0);
411 
412         if divisor.is_power_of_two() {
413             Self{ multiplier_hi: 0, multiplier_lo: 0, divisor }
414         } else {
415             let (quotient_hi, quotient_lo) = long_division::divide_256_max_by_128(divisor);
416             let multiplier_lo = quotient_lo.wrapping_add(1);
417             let multiplier_hi = if multiplier_lo == 0 { quotient_hi + 1 } else { quotient_hi };
418             Self{ multiplier_hi, multiplier_lo, divisor }
419         }
420     }
421 
422     /// Simultaneous truncated integer division and modulus.
423     /// Returns `(quotient, remainder)`.
424     #[inline]
div_rem(numerator: u128, denom: Self) -> (u128, u128)425     pub fn div_rem(numerator: u128, denom: Self) -> (u128, u128) {
426         let quotient = numerator / denom;
427         let remainder = numerator - quotient * denom.divisor;
428         (quotient, remainder)
429     }
430 
431     /// Retrieve the value used to create this struct
432     #[inline]
get(&self) -> u128433     pub fn get(&self) -> u128 {
434         self.divisor
435     }
436 }
437 
438 impl Div<StrengthReducedU128> for u128 {
439     type Output = u128;
440 
441     #[inline]
div(self, rhs: StrengthReducedU128) -> Self::Output442     fn div(self, rhs: StrengthReducedU128) -> Self::Output {
443         if rhs.multiplier_hi == 0 {
444             self >> rhs.divisor.trailing_zeros()
445         } else {
446             long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self)
447         }
448     }
449 }
450 
451 impl Rem<StrengthReducedU128> for u128 {
452     type Output = u128;
453 
454     #[inline]
rem(self, rhs: StrengthReducedU128) -> Self::Output455     fn rem(self, rhs: StrengthReducedU128) -> Self::Output {
456         if rhs.multiplier_hi == 0 {
457             self & (rhs.divisor - 1)
458         } else {
459              let quotient = long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self);
460              self - quotient * rhs.divisor
461         }
462     }
463 }
464 
465 // We just hardcoded u8 and u128 since they will never be a usize. for the rest, we have macros, so we can reuse the same code for usize
466 strength_reduced_u16!(StrengthReducedU16, u16);
467 strength_reduced_u32!(StrengthReducedU32, u32);
468 strength_reduced_u64!(StrengthReducedU64, u64);
469 
470 // Our definition for usize will depend on how big usize is
471 #[cfg(target_pointer_width = "16")]
472 strength_reduced_u16!(StrengthReducedUsize, usize);
473 #[cfg(target_pointer_width = "32")]
474 strength_reduced_u32!(StrengthReducedUsize, usize);
475 #[cfg(target_pointer_width = "64")]
476 strength_reduced_u64!(StrengthReducedUsize, usize);
477 
478 #[cfg(test)]
479 mod unit_tests {
480     use super::*;
481 
482     macro_rules! reduction_test {
483         ($test_name:ident, $struct_name:ident, $primitive_type:ident) => (
484             #[test]
485             fn $test_name() {
486                 let max = core::$primitive_type::MAX;
487                 let divisors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,max-1,max];
488                 let numerators = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];
489 
490                 for &divisor in &divisors {
491                     let reduced_divisor = $struct_name::new(divisor);
492                     for &numerator in &numerators {
493                         let expected_div = numerator / divisor;
494                         let expected_rem = numerator % divisor;
495 
496                         let reduced_div = numerator / reduced_divisor;
497 
498                         assert_eq!(expected_div, reduced_div, "Divide failed with numerator: {}, divisor: {}", numerator, divisor);
499                         let reduced_rem = numerator % reduced_divisor;
500 
501                         let (reduced_combined_div, reduced_combined_rem) = $struct_name::div_rem(numerator, reduced_divisor);
502 
503 
504                         assert_eq!(expected_rem, reduced_rem, "Modulo failed with numerator: {}, divisor: {}", numerator, divisor);
505                         assert_eq!(expected_div, reduced_combined_div, "div_rem divide failed with numerator: {}, divisor: {}", numerator, divisor);
506                         assert_eq!(expected_rem, reduced_combined_rem, "div_rem modulo failed with numerator: {}, divisor: {}", numerator, divisor);
507                     }
508                 }
509             }
510         )
511     }
512 
513     reduction_test!(test_strength_reduced_u8, StrengthReducedU8, u8);
514     reduction_test!(test_strength_reduced_u16, StrengthReducedU16, u16);
515     reduction_test!(test_strength_reduced_u32, StrengthReducedU32, u32);
516     reduction_test!(test_strength_reduced_u64, StrengthReducedU64, u64);
517     reduction_test!(test_strength_reduced_usize, StrengthReducedUsize, usize);
518     reduction_test!(test_strength_reduced_u128, StrengthReducedU128, u128);
519 }
520