1 //! `strength_reduce` implements integer division and modulo via "arithmetic strength reduction" 2 //! 3 //! Modern processors can do multiplication and shifts much faster than division, and "arithmetic strength reduction" is an algorithm to transform divisions into multiplications and shifts. 4 //! Compilers already perform this optimization for divisors that are known at compile time; this library enables this optimization for divisors that are only known at runtime. 5 //! 6 //! Benchmarking shows a 5-10x speedup or integer division and modulo operations. 7 //! 8 //! # Example: 9 //! ``` 10 //! use strength_reduce::StrengthReducedU64; 11 //! 12 //! let mut my_array: Vec<u64> = (0..500).collect(); 13 //! let divisor = 3; 14 //! let modulo = 14; 15 //! 16 //! // slow naive division and modulo 17 //! for element in &mut my_array { 18 //! *element = (*element / divisor) % modulo; 19 //! } 20 //! 21 //! // fast strength-reduced division and modulo 22 //! let reduced_divisor = StrengthReducedU64::new(divisor); 23 //! let reduced_modulo = StrengthReducedU64::new(modulo); 24 //! for element in &mut my_array { 25 //! *element = (*element / reduced_divisor) % reduced_modulo; 26 //! } 27 //! ``` 28 //! 29 //! This library is intended for hot loops like the example above, where a division is repeated many times in a loop with the divisor remaining unchanged. 30 //! There is a setup cost associated with creating stength-reduced division instances, so using strength-reduced division for 1-2 divisions is not worth the setup cost. 31 //! The break-even point differs by use-case, but is typically low: Benchmarking has shown that takes 3 to 4 repeated divisions with the same StengthReduced## instance to be worth it. 32 //! 33 //! `strength_reduce` is `#![no_std]` 34 //! 35 //! The optimizations that this library provides are inherently dependent on architecture, compiler, and platform, 36 //! so test before you use. 37 #![no_std] 38 39 #[cfg(test)] 40 extern crate num_bigint; 41 #[cfg(test)] 42 extern crate rand; 43 44 use core::ops::{Div, Rem}; 45 46 mod long_division; 47 mod long_multiplication; 48 49 /// Implements unsigned division and modulo via mutiplication and shifts. 50 /// 51 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated, 52 /// this version will be several times faster than naive division. 53 #[derive(Clone, Copy, Debug)] 54 pub struct StrengthReducedU8 { 55 multiplier: u16, 56 divisor: u8, 57 } 58 impl StrengthReducedU8 { 59 /// Creates a new divisor instance. 60 /// 61 /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop. 62 /// 63 /// # Panics: 64 /// 65 /// Panics if `divisor` is 0 66 #[inline] new(divisor: u8) -> Self67 pub fn new(divisor: u8) -> Self { 68 assert!(divisor > 0); 69 70 if divisor.is_power_of_two() { 71 Self{ multiplier: 0, divisor } 72 } else { 73 let divided = core::u16::MAX / (divisor as u16); 74 Self{ multiplier: divided + 1, divisor } 75 } 76 } 77 78 /// Simultaneous truncated integer division and modulus. 79 /// Returns `(quotient, remainder)`. 80 #[inline] div_rem(numerator: u8, denom: Self) -> (u8, u8)81 pub fn div_rem(numerator: u8, denom: Self) -> (u8, u8) { 82 let quotient = numerator / denom; 83 let remainder = numerator % denom; 84 (quotient, remainder) 85 } 86 87 /// Retrieve the value used to create this struct 88 #[inline] get(&self) -> u889 pub fn get(&self) -> u8 { 90 self.divisor 91 } 92 } 93 94 impl Div<StrengthReducedU8> for u8 { 95 type Output = u8; 96 97 #[inline] div(self, rhs: StrengthReducedU8) -> Self::Output98 fn div(self, rhs: StrengthReducedU8) -> Self::Output { 99 if rhs.multiplier == 0 { 100 (self as u16 >> rhs.divisor.trailing_zeros()) as u8 101 } else { 102 let numerator = self as u16; 103 let multiplied_hi = numerator * (rhs.multiplier >> 8); 104 let multiplied_lo = (numerator * rhs.multiplier as u8 as u16) >> 8; 105 106 ((multiplied_hi + multiplied_lo) >> 8) as u8 107 } 108 } 109 } 110 111 impl Rem<StrengthReducedU8> for u8 { 112 type Output = u8; 113 114 #[inline] rem(self, rhs: StrengthReducedU8) -> Self::Output115 fn rem(self, rhs: StrengthReducedU8) -> Self::Output { 116 if rhs.multiplier == 0 { 117 self & (rhs.divisor - 1) 118 } else { 119 let product = rhs.multiplier.wrapping_mul(self as u16) as u32; 120 let divisor = rhs.divisor as u32; 121 122 let shifted = (product * divisor) >> 16; 123 shifted as u8 124 } 125 } 126 } 127 128 // small types prefer to do work in the intermediate type 129 macro_rules! strength_reduced_u16 { 130 ($struct_name:ident, $primitive_type:ident) => ( 131 /// Implements unsigned division and modulo via mutiplication and shifts. 132 /// 133 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated, 134 /// this version will be several times faster than naive division. 135 #[derive(Clone, Copy, Debug)] 136 pub struct $struct_name { 137 multiplier: u32, 138 divisor: $primitive_type, 139 } 140 impl $struct_name { 141 /// Creates a new divisor instance. 142 /// 143 /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop. 144 /// 145 /// # Panics: 146 /// 147 /// Panics if `divisor` is 0 148 #[inline] 149 pub fn new(divisor: $primitive_type) -> Self { 150 assert!(divisor > 0); 151 152 if divisor.is_power_of_two() { 153 Self{ multiplier: 0, divisor } 154 } else { 155 let divided = core::u32::MAX / (divisor as u32); 156 Self{ multiplier: divided + 1, divisor } 157 } 158 } 159 160 /// Simultaneous truncated integer division and modulus. 161 /// Returns `(quotient, remainder)`. 162 #[inline] 163 pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) { 164 let quotient = numerator / denom; 165 let remainder = numerator - quotient * denom.divisor; 166 (quotient, remainder) 167 } 168 169 /// Retrieve the value used to create this struct 170 #[inline] 171 pub fn get(&self) -> $primitive_type { 172 self.divisor 173 } 174 } 175 176 impl Div<$struct_name> for $primitive_type { 177 type Output = $primitive_type; 178 179 #[inline] 180 fn div(self, rhs: $struct_name) -> Self::Output { 181 if rhs.multiplier == 0 { 182 self >> rhs.divisor.trailing_zeros() 183 } else { 184 let numerator = self as u32; 185 let multiplied_hi = numerator * (rhs.multiplier >> 16); 186 let multiplied_lo = (numerator * rhs.multiplier as u16 as u32) >> 16; 187 188 ((multiplied_hi + multiplied_lo) >> 16) as $primitive_type 189 } 190 } 191 } 192 193 impl Rem<$struct_name> for $primitive_type { 194 type Output = $primitive_type; 195 196 #[inline] 197 fn rem(self, rhs: $struct_name) -> Self::Output { 198 if rhs.multiplier == 0 { 199 self & (rhs.divisor - 1) 200 } else { 201 let quotient = self / rhs; 202 self - quotient * rhs.divisor 203 } 204 } 205 } 206 ) 207 } 208 209 // small types prefer to do work in the intermediate type 210 macro_rules! strength_reduced_u32 { 211 ($struct_name:ident, $primitive_type:ident) => ( 212 /// Implements unsigned division and modulo via mutiplication and shifts. 213 /// 214 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated, 215 /// this version will be several times faster than naive division. 216 #[derive(Clone, Copy, Debug)] 217 pub struct $struct_name { 218 multiplier: u64, 219 divisor: $primitive_type, 220 } 221 impl $struct_name { 222 /// Creates a new divisor instance. 223 /// 224 /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop. 225 /// 226 /// # Panics: 227 /// 228 /// Panics if `divisor` is 0 229 #[inline] 230 pub fn new(divisor: $primitive_type) -> Self { 231 assert!(divisor > 0); 232 233 if divisor.is_power_of_two() { 234 Self{ multiplier: 0, divisor } 235 } else { 236 let divided = core::u64::MAX / (divisor as u64); 237 Self{ multiplier: divided + 1, divisor } 238 } 239 } 240 241 /// Simultaneous truncated integer division and modulus. 242 /// Returns `(quotient, remainder)`. 243 #[inline] 244 pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) { 245 if denom.multiplier == 0 { 246 (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1)) 247 } 248 else { 249 let numerator64 = numerator as u64; 250 let multiplied_hi = numerator64 * (denom.multiplier >> 32); 251 let multiplied_lo = numerator64 * (denom.multiplier as u32 as u64) >> 32; 252 253 let quotient = ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type; 254 let remainder = numerator - quotient * denom.divisor; 255 (quotient, remainder) 256 } 257 } 258 259 /// Retrieve the value used to create this struct 260 #[inline] 261 pub fn get(&self) -> $primitive_type { 262 self.divisor 263 } 264 } 265 266 impl Div<$struct_name> for $primitive_type { 267 type Output = $primitive_type; 268 269 #[inline] 270 fn div(self, rhs: $struct_name) -> Self::Output { 271 if rhs.multiplier == 0 { 272 self >> rhs.divisor.trailing_zeros() 273 } else { 274 let numerator = self as u64; 275 let multiplied_hi = numerator * (rhs.multiplier >> 32); 276 let multiplied_lo = numerator * (rhs.multiplier as u32 as u64) >> 32; 277 278 ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type 279 } 280 } 281 } 282 283 impl Rem<$struct_name> for $primitive_type { 284 type Output = $primitive_type; 285 286 #[inline] 287 fn rem(self, rhs: $struct_name) -> Self::Output { 288 if rhs.multiplier == 0 { 289 self & (rhs.divisor - 1) 290 } else { 291 let product = rhs.multiplier.wrapping_mul(self as u64) as u128; 292 let divisor = rhs.divisor as u128; 293 294 let shifted = (product * divisor) >> 64; 295 shifted as $primitive_type 296 } 297 } 298 } 299 ) 300 } 301 302 macro_rules! strength_reduced_u64 { 303 ($struct_name:ident, $primitive_type:ident) => ( 304 /// Implements unsigned division and modulo via mutiplication and shifts. 305 /// 306 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated, 307 /// this version will be several times faster than naive division. 308 #[derive(Clone, Copy, Debug)] 309 pub struct $struct_name { 310 multiplier: u128, 311 divisor: $primitive_type, 312 } 313 impl $struct_name { 314 /// Creates a new divisor instance. 315 /// 316 /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop. 317 /// 318 /// # Panics: 319 /// 320 /// Panics if `divisor` is 0 321 #[inline] 322 pub fn new(divisor: $primitive_type) -> Self { 323 assert!(divisor > 0); 324 325 if divisor.is_power_of_two() { 326 Self{ multiplier: 0, divisor } 327 } else { 328 let quotient = long_division::divide_128_max_by_64(divisor as u64); 329 Self{ multiplier: quotient + 1, divisor } 330 } 331 } 332 /// Simultaneous truncated integer division and modulus. 333 /// Returns `(quotient, remainder)`. 334 #[inline] 335 pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) { 336 if denom.multiplier == 0 { 337 (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1)) 338 } 339 else { 340 let numerator128 = numerator as u128; 341 let multiplied_hi = numerator128 * (denom.multiplier >> 64); 342 let multiplied_lo = numerator128 * (denom.multiplier as u64 as u128) >> 64; 343 344 let quotient = ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type; 345 let remainder = numerator - quotient * denom.divisor; 346 (quotient, remainder) 347 } 348 } 349 350 /// Retrieve the value used to create this struct 351 #[inline] 352 pub fn get(&self) -> $primitive_type { 353 self.divisor 354 } 355 } 356 357 impl Div<$struct_name> for $primitive_type { 358 type Output = $primitive_type; 359 360 #[inline] 361 fn div(self, rhs: $struct_name) -> Self::Output { 362 if rhs.multiplier == 0 { 363 self >> rhs.divisor.trailing_zeros() 364 } else { 365 let numerator = self as u128; 366 let multiplied_hi = numerator * (rhs.multiplier >> 64); 367 let multiplied_lo = numerator * (rhs.multiplier as u64 as u128) >> 64; 368 369 ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type 370 } 371 } 372 } 373 374 impl Rem<$struct_name> for $primitive_type { 375 type Output = $primitive_type; 376 377 #[inline] 378 fn rem(self, rhs: $struct_name) -> Self::Output { 379 if rhs.multiplier == 0 { 380 self & (rhs.divisor - 1) 381 } else { 382 let quotient = self / rhs; 383 self - quotient * rhs.divisor 384 } 385 } 386 } 387 ) 388 } 389 390 /// Implements unsigned division and modulo via mutiplication and shifts. 391 /// 392 /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated, 393 /// this version will be several times faster than naive division. 394 #[derive(Clone, Copy, Debug)] 395 pub struct StrengthReducedU128 { 396 multiplier_hi: u128, 397 multiplier_lo: u128, 398 divisor: u128, 399 } 400 impl StrengthReducedU128 { 401 /// Creates a new divisor instance. 402 /// 403 /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop. 404 /// 405 /// # Panics: 406 /// 407 /// Panics if `divisor` is 0 408 #[inline] new(divisor: u128) -> Self409 pub fn new(divisor: u128) -> Self { 410 assert!(divisor > 0); 411 412 if divisor.is_power_of_two() { 413 Self{ multiplier_hi: 0, multiplier_lo: 0, divisor } 414 } else { 415 let (quotient_hi, quotient_lo) = long_division::divide_256_max_by_128(divisor); 416 let multiplier_lo = quotient_lo.wrapping_add(1); 417 let multiplier_hi = if multiplier_lo == 0 { quotient_hi + 1 } else { quotient_hi }; 418 Self{ multiplier_hi, multiplier_lo, divisor } 419 } 420 } 421 422 /// Simultaneous truncated integer division and modulus. 423 /// Returns `(quotient, remainder)`. 424 #[inline] div_rem(numerator: u128, denom: Self) -> (u128, u128)425 pub fn div_rem(numerator: u128, denom: Self) -> (u128, u128) { 426 let quotient = numerator / denom; 427 let remainder = numerator - quotient * denom.divisor; 428 (quotient, remainder) 429 } 430 431 /// Retrieve the value used to create this struct 432 #[inline] get(&self) -> u128433 pub fn get(&self) -> u128 { 434 self.divisor 435 } 436 } 437 438 impl Div<StrengthReducedU128> for u128 { 439 type Output = u128; 440 441 #[inline] div(self, rhs: StrengthReducedU128) -> Self::Output442 fn div(self, rhs: StrengthReducedU128) -> Self::Output { 443 if rhs.multiplier_hi == 0 { 444 self >> rhs.divisor.trailing_zeros() 445 } else { 446 long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self) 447 } 448 } 449 } 450 451 impl Rem<StrengthReducedU128> for u128 { 452 type Output = u128; 453 454 #[inline] rem(self, rhs: StrengthReducedU128) -> Self::Output455 fn rem(self, rhs: StrengthReducedU128) -> Self::Output { 456 if rhs.multiplier_hi == 0 { 457 self & (rhs.divisor - 1) 458 } else { 459 let quotient = long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self); 460 self - quotient * rhs.divisor 461 } 462 } 463 } 464 465 // We just hardcoded u8 and u128 since they will never be a usize. for the rest, we have macros, so we can reuse the same code for usize 466 strength_reduced_u16!(StrengthReducedU16, u16); 467 strength_reduced_u32!(StrengthReducedU32, u32); 468 strength_reduced_u64!(StrengthReducedU64, u64); 469 470 // Our definition for usize will depend on how big usize is 471 #[cfg(target_pointer_width = "16")] 472 strength_reduced_u16!(StrengthReducedUsize, usize); 473 #[cfg(target_pointer_width = "32")] 474 strength_reduced_u32!(StrengthReducedUsize, usize); 475 #[cfg(target_pointer_width = "64")] 476 strength_reduced_u64!(StrengthReducedUsize, usize); 477 478 #[cfg(test)] 479 mod unit_tests { 480 use super::*; 481 482 macro_rules! reduction_test { 483 ($test_name:ident, $struct_name:ident, $primitive_type:ident) => ( 484 #[test] 485 fn $test_name() { 486 let max = core::$primitive_type::MAX; 487 let divisors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,max-1,max]; 488 let numerators = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; 489 490 for &divisor in &divisors { 491 let reduced_divisor = $struct_name::new(divisor); 492 for &numerator in &numerators { 493 let expected_div = numerator / divisor; 494 let expected_rem = numerator % divisor; 495 496 let reduced_div = numerator / reduced_divisor; 497 498 assert_eq!(expected_div, reduced_div, "Divide failed with numerator: {}, divisor: {}", numerator, divisor); 499 let reduced_rem = numerator % reduced_divisor; 500 501 let (reduced_combined_div, reduced_combined_rem) = $struct_name::div_rem(numerator, reduced_divisor); 502 503 504 assert_eq!(expected_rem, reduced_rem, "Modulo failed with numerator: {}, divisor: {}", numerator, divisor); 505 assert_eq!(expected_div, reduced_combined_div, "div_rem divide failed with numerator: {}, divisor: {}", numerator, divisor); 506 assert_eq!(expected_rem, reduced_combined_rem, "div_rem modulo failed with numerator: {}, divisor: {}", numerator, divisor); 507 } 508 } 509 } 510 ) 511 } 512 513 reduction_test!(test_strength_reduced_u8, StrengthReducedU8, u8); 514 reduction_test!(test_strength_reduced_u16, StrengthReducedU16, u16); 515 reduction_test!(test_strength_reduced_u32, StrengthReducedU32, u32); 516 reduction_test!(test_strength_reduced_u64, StrengthReducedU64, u64); 517 reduction_test!(test_strength_reduced_usize, StrengthReducedUsize, usize); 518 reduction_test!(test_strength_reduced_u128, StrengthReducedU128, u128); 519 } 520