1 extern crate byteorder; 2 extern crate digest; 3 extern crate murmurhash3; 4 extern crate sha2; 5 6 use byteorder::ReadBytesExt; 7 use murmurhash3::murmurhash3_x86_32; 8 use sha2::{Digest, Sha256}; 9 use std::convert::{TryFrom, TryInto}; 10 use std::fmt; 11 use std::io::{Error, ErrorKind}; 12 13 /// Helper struct to provide read-only bit access to a slice of bytes. 14 struct BitSlice<'a> { 15 /// The slice of bytes we're interested in. 16 bytes: &'a [u8], 17 /// The number of bits that are valid to access in the slice. 18 /// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that. 19 bit_len: usize, 20 } 21 22 impl<'a> BitSlice<'a> { 23 /// Creates a new `BitSlice` of the given bit length over the given slice of data. 24 /// Panics if the indicated bit length is larger than fits in the slice. 25 /// 26 /// # Arguments 27 /// * `bytes` - The slice of bytes we need bit-access to 28 /// * `bit_len` - The number of bits that are valid to access in the slice new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a>29 fn new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a> { 30 if bit_len > bytes.len() * 8 { 31 panic!( 32 "bit_len too large for given data: {} > {} * 8", 33 bit_len, 34 bytes.len() 35 ); 36 } 37 BitSlice { bytes, bit_len } 38 } 39 40 /// Get the value of the specified bit. 41 /// Panics if the specified bit is out of range for the number of bits in this instance. 42 /// 43 /// # Arguments 44 /// * `bit_index` - The bit index to access get(&self, bit_index: usize) -> bool45 fn get(&self, bit_index: usize) -> bool { 46 if bit_index >= self.bit_len { 47 panic!( 48 "bit index out of range for bit slice: {} >= {}", 49 bit_index, self.bit_len 50 ); 51 } 52 let byte_index = bit_index / 8; 53 let final_bit_index = bit_index % 8; 54 let byte = self.bytes[byte_index]; 55 let test_value = match final_bit_index { 56 0 => byte & 0b0000_0001u8, 57 1 => byte & 0b0000_0010u8, 58 2 => byte & 0b0000_0100u8, 59 3 => byte & 0b0000_1000u8, 60 4 => byte & 0b0001_0000u8, 61 5 => byte & 0b0010_0000u8, 62 6 => byte & 0b0100_0000u8, 63 7 => byte & 0b1000_0000u8, 64 _ => panic!("impossible final_bit_index value: {}", final_bit_index), 65 }; 66 test_value > 0 67 } 68 } 69 70 /// A Bloom filter representing a specific level in a multi-level cascading Bloom filter. 71 struct Bloom<'a> { 72 /// What level this filter is in 73 level: u8, 74 /// How many hash functions this filter uses 75 n_hash_funcs: u32, 76 /// The bit length of the filter 77 size: u32, 78 /// The data of the filter 79 bit_slice: BitSlice<'a>, 80 /// The hash algorithm enumeration in use 81 hash_algorithm: HashAlgorithm, 82 } 83 84 #[repr(u8)] 85 #[derive(Copy, Clone)] 86 /// These enumerations need to match the python filter-cascade project: 87 /// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py 88 enum HashAlgorithm { 89 MurmurHash3 = 1, 90 Sha256 = 2, 91 } 92 93 impl fmt::Display for HashAlgorithm { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result94 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 95 write!(f, "{}", *self as u8) 96 } 97 } 98 99 impl TryFrom<u8> for HashAlgorithm { 100 type Error = (); try_from(value: u8) -> Result<HashAlgorithm, ()>101 fn try_from(value: u8) -> Result<HashAlgorithm, ()> { 102 match value { 103 // Naturally, these need to match the enum declaration 104 1 => Ok(Self::MurmurHash3), 105 2 => Ok(Self::Sha256), 106 _ => Err(()), 107 } 108 } 109 } 110 111 impl<'a> Bloom<'a> { 112 /// Attempts to decode and return a pair that consists of the Bloom filter represented by the 113 /// given bytes and any remaining unprocessed bytes in the given bytes. 114 /// 115 /// # Arguments 116 /// * `bytes` - The encoded representation of this Bloom filter. May include additional data 117 /// describing further Bloom filters. Any additional data is returned unconsumed. 118 /// The format of an encoded Bloom filter is: 119 /// [1 byte] - the hash algorithm to use in the filter 120 /// [4 little endian bytes] - the length in bits of the filter 121 /// [4 little endian bytes] - the number of hash functions to use in the filter 122 /// [1 byte] - which level in the cascade this filter is 123 /// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length 124 /// / 8) from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error>125 pub fn from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error> { 126 let mut cursor = bytes; 127 // Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian 128 // unsigned ints. 129 let hash_algorithm_val = cursor.read_u8()?; 130 let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) { 131 Ok(algo) => algo, 132 Err(()) => { 133 return Err(Error::new( 134 ErrorKind::InvalidData, 135 "Unexpected hash algorithm", 136 )) 137 } 138 }; 139 140 let size = cursor.read_u32::<byteorder::LittleEndian>()?; 141 let n_hash_funcs = cursor.read_u32::<byteorder::LittleEndian>()?; 142 let level = cursor.read_u8()?; 143 144 let shifted_size = size.wrapping_shr(3) as usize; 145 let byte_count = if size % 8 != 0 { 146 shifted_size + 1 147 } else { 148 shifted_size 149 }; 150 if byte_count > cursor.len() { 151 return Err(Error::new( 152 ErrorKind::InvalidData, 153 "Invalid Bloom filter: too short", 154 )); 155 } 156 let (bits_bytes, rest_of_bytes) = cursor.split_at(byte_count); 157 let bloom = Bloom { 158 level, 159 n_hash_funcs, 160 size, 161 bit_slice: BitSlice::new(bits_bytes, size as usize), 162 hash_algorithm, 163 }; 164 Ok((bloom, rest_of_bytes)) 165 } 166 hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32167 fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32 { 168 match self.hash_algorithm { 169 HashAlgorithm::MurmurHash3 => { 170 if salt.is_some() { 171 panic!("murmur does not support salts") 172 } 173 let hash_seed = (n_fn << 16) + self.level as u32; 174 murmurhash3_x86_32(key, hash_seed) % self.size 175 } 176 HashAlgorithm::Sha256 => { 177 let mut hasher = Sha256::new(); 178 if let Some(salt_bytes) = salt { 179 hasher.input(salt_bytes) 180 } 181 hasher.input(n_fn.to_le_bytes()); 182 hasher.input(self.level.to_le_bytes()); 183 hasher.input(key); 184 185 u32::from_le_bytes( 186 hasher.result()[0..4] 187 .try_into() 188 .expect("sha256 should have given enough bytes"), 189 ) % self.size 190 } 191 } 192 } 193 194 /// Test for the presence of a given sequence of bytes in this Bloom filter. 195 /// 196 /// # Arguments 197 /// `item` - The slice of bytes to test for has(&self, item: &[u8], salt: Option<&[u8]>) -> bool198 pub fn has(&self, item: &[u8], salt: Option<&[u8]>) -> bool { 199 for i in 0..self.n_hash_funcs { 200 if !self.bit_slice.get(self.hash(i, item, salt) as usize) { 201 return false; 202 } 203 } 204 true 205 } 206 } 207 208 impl<'a> fmt::Display for Bloom<'a> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result209 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 210 write!( 211 f, 212 "level={} n_hash_funcs={} hash_algorithm={} size={}", 213 self.level, self.n_hash_funcs, self.hash_algorithm, self.size 214 ) 215 } 216 } 217 218 /// A multi-level cascading Bloom filter. 219 pub struct Cascade<'a> { 220 /// The Bloom filter for this level in the cascade 221 filter: Bloom<'a>, 222 /// The next (lower) level in the cascade 223 child_layer: Option<Box<Cascade<'a>>>, 224 /// The salt in use, if any 225 salt: Option<&'a [u8]>, 226 /// Whether the logic should be inverted 227 inverted: bool, 228 } 229 230 impl<'a> Cascade<'a> { 231 /// Attempts to decode and return a multi-level cascading Bloom filter. NB: `Cascade` does not 232 /// take ownership of the given data. This is to facilitate decoding cascading filters 233 /// backed by memory-mapped files. 234 /// 235 /// # Arguments 236 /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2 237 /// little endian bytes indicating the version. The current version is 2. The Python 238 /// filter-cascade project defines the formats, see 239 /// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py 240 /// 241 /// May be of length 0, in which case `None` is returned. from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error>242 pub fn from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error> { 243 if bytes.is_empty() { 244 return Ok(None); 245 } 246 let mut cursor = bytes; 247 let version = cursor.read_u16::<byteorder::LittleEndian>()?; 248 let mut salt = None; 249 let mut inverted = false; 250 251 if version >= 2 { 252 inverted = cursor.read_u8()? != 0; 253 let salt_len = cursor.read_u8()? as usize; 254 255 if salt_len > cursor.len() { 256 return Err(Error::new( 257 ErrorKind::InvalidData, 258 "Invalid Bloom filter: too short", 259 )); 260 } 261 262 let (salt_bytes, remaining_bytes) = cursor.split_at(salt_len); 263 if salt_len > 0 { 264 salt = Some(salt_bytes) 265 } 266 cursor = remaining_bytes; 267 } 268 269 if version > 2 { 270 return Err(Error::new( 271 ErrorKind::InvalidData, 272 format!("Invalid version: {}", version), 273 )); 274 } 275 276 Cascade::child_layer_from_bytes(cursor, salt, inverted) 277 } 278 child_layer_from_bytes( bytes: &'a [u8], salt: Option<&'a [u8]>, inverted: bool, ) -> Result<Option<Box<Cascade<'a>>>, Error>279 fn child_layer_from_bytes( 280 bytes: &'a [u8], 281 salt: Option<&'a [u8]>, 282 inverted: bool, 283 ) -> Result<Option<Box<Cascade<'a>>>, Error> { 284 if bytes.is_empty() { 285 return Ok(None); 286 } 287 let (filter, rest_of_bytes) = Bloom::from_bytes(bytes)?; 288 Ok(Some(Box::new(Cascade { 289 filter, 290 child_layer: Cascade::child_layer_from_bytes(rest_of_bytes, salt, inverted)?, 291 salt, 292 inverted, 293 }))) 294 } 295 296 /// Determine if the given sequence of bytes is in the cascade. 297 /// 298 /// # Arguments 299 /// `entry` - The slice of bytes to test for has(&self, entry: &[u8]) -> bool300 pub fn has(&self, entry: &[u8]) -> bool { 301 let result = self.has_internal(entry); 302 if self.inverted { 303 return !result; 304 } 305 result 306 } 307 has_internal(&self, entry: &[u8]) -> bool308 pub fn has_internal(&self, entry: &[u8]) -> bool { 309 if self.filter.has(&entry, self.salt) { 310 match self.child_layer { 311 Some(ref child) => { 312 let child_value = !child.has_internal(entry); 313 return child_value; 314 } 315 None => { 316 return true; 317 } 318 } 319 } 320 false 321 } 322 } 323 324 impl<'a> fmt::Display for Cascade<'a> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result325 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 326 write!( 327 f, 328 "salt={:?} inverted={} filter=[{}] ", 329 self.salt, self.inverted, self.filter 330 )?; 331 match &self.child_layer { 332 Some(layer) => write!(f, "[child={}]", layer), 333 None => Ok(()), 334 } 335 } 336 } 337 338 #[cfg(test)] 339 mod tests { 340 use Bloom; 341 use Cascade; 342 343 #[test] bloom_v1_test_from_bytes()344 fn bloom_v1_test_from_bytes() { 345 let src: Vec<u8> = vec![ 346 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00, 347 ]; 348 349 match Bloom::from_bytes(&src) { 350 Ok((bloom, rest_of_bytes)) => { 351 assert!(rest_of_bytes.len() == 0); 352 assert!(bloom.has(b"this", None) == true); 353 assert!(bloom.has(b"that", None) == true); 354 assert!(bloom.has(b"other", None) == false); 355 } 356 Err(_) => { 357 panic!("Parsing failed"); 358 } 359 }; 360 361 let short: Vec<u8> = vec![ 362 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 363 ]; 364 assert!(Bloom::from_bytes(&short).is_err()); 365 } 366 367 #[test] bloom_v3_unsupported()368 fn bloom_v3_unsupported() { 369 let src: Vec<u8> = vec![0x03, 0x01, 0x00]; 370 assert!(Bloom::from_bytes(&src).is_err()); 371 } 372 373 #[test] cascade_v1_murmur_from_file_bytes_test()374 fn cascade_v1_murmur_from_file_bytes_test() { 375 let v = include_bytes!("../test_data/test_v1_murmur_mlbf"); 376 let cascade = Cascade::from_bytes(v) 377 .expect("parsing Cascade should succeed") 378 .expect("Cascade should be Some"); 379 // Key format is SHA256(issuer SPKI) + serial number 380 #[rustfmt::skip] 381 let key_for_revoked_cert_1 = 382 [ 0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98, 383 0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25, 384 0x3a, 0x40, 0x9b, 0xf5, 385 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7, 0xec, 0x43, 0x27, 0xe1, 386 0xba, 0xfd ]; 387 assert!(cascade.has(&key_for_revoked_cert_1)); 388 #[rustfmt::skip] 389 let key_for_revoked_cert_2 = 390 [ 0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5, 391 0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51, 392 0x73, 0xed, 0x69, 0x8f, 393 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4 ]; 394 assert!(cascade.has(&key_for_revoked_cert_2)); 395 #[rustfmt::skip] 396 let key_for_valid_cert = 397 [ 0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d, 398 0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf, 399 0xb9, 0x0b, 0x5f, 0x39, 400 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9, 0xcd, 0x32, 0x57, 0x10, 401 0x77, 0x8e ]; 402 assert!(!cascade.has(&key_for_valid_cert)); 403 404 let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf"); 405 assert!(Cascade::from_bytes(v).is_err()); 406 } 407 408 #[test] cascade_v2_sha256_from_file_bytes_test()409 fn cascade_v2_sha256_from_file_bytes_test() { 410 let v = include_bytes!("../test_data/test_v2_sha256_mlbf"); 411 let cascade = Cascade::from_bytes(v) 412 .expect("parsing Cascade should succeed") 413 .expect("Cascade should be Some"); 414 415 assert!(cascade.salt == None); 416 assert!(cascade.inverted == false); 417 assert!(cascade.has(b"this") == true); 418 assert!(cascade.has(b"that") == true); 419 assert!(cascade.has(b"other") == false); 420 } 421 422 #[test] cascade_v2_sha256_with_salt_from_file_bytes_test()423 fn cascade_v2_sha256_with_salt_from_file_bytes_test() { 424 let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf"); 425 let cascade = Cascade::from_bytes(v) 426 .expect("parsing Cascade should succeed") 427 .expect("Cascade should be Some"); 428 429 assert!(cascade.salt == Some(b"nacl")); 430 assert!(cascade.inverted == false); 431 assert!(cascade.has(b"this") == true); 432 assert!(cascade.has(b"that") == true); 433 assert!(cascade.has(b"other") == false); 434 } 435 436 #[test] cascade_v2_murmur_from_file_bytes_test()437 fn cascade_v2_murmur_from_file_bytes_test() { 438 let v = include_bytes!("../test_data/test_v2_murmur_mlbf"); 439 let cascade = Cascade::from_bytes(v) 440 .expect("parsing Cascade should succeed") 441 .expect("Cascade should be Some"); 442 443 assert!(cascade.salt == None); 444 assert!(cascade.inverted == false); 445 assert!(cascade.has(b"this") == true); 446 assert!(cascade.has(b"that") == true); 447 assert!(cascade.has(b"other") == false); 448 } 449 450 #[test] cascade_v2_murmur_inverted_from_file_bytes_test()451 fn cascade_v2_murmur_inverted_from_file_bytes_test() { 452 let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf"); 453 let cascade = Cascade::from_bytes(v) 454 .expect("parsing Cascade should succeed") 455 .expect("Cascade should be Some"); 456 457 assert!(cascade.salt == None); 458 assert!(cascade.inverted == true); 459 assert!(cascade.has(b"this") == true); 460 assert!(cascade.has(b"that") == true); 461 assert!(cascade.has(b"other") == false); 462 } 463 464 #[test] cascade_v2_sha256_inverted_from_file_bytes_test()465 fn cascade_v2_sha256_inverted_from_file_bytes_test() { 466 let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf"); 467 let cascade = Cascade::from_bytes(v) 468 .expect("parsing Cascade should succeed") 469 .expect("Cascade should be Some"); 470 471 assert!(cascade.salt == None); 472 assert!(cascade.inverted == true); 473 assert!(cascade.has(b"this") == true); 474 assert!(cascade.has(b"that") == true); 475 assert!(cascade.has(b"other") == false); 476 } 477 } 478