1 // This code is unused. PCMPESTRI is gratuitously slow. I imagine it might 2 // start winning with a hypothetical memchr4 (or greater). This technique might 3 // also be good for exposing searches over ranges of bytes, but that departs 4 // from the standard memchr API, so it's not clear whether we actually want 5 // that or not. 6 // 7 // N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind 8 // of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means 9 // I don't see a way of effectively using PCMPISTRI unless there's some fast 10 // way to replace zero bytes with a byte that is not not a needle byte. 11 12 use core::arch::x86_64::*; 13 use core::mem::size_of; 14 15 use x86::sse2; 16 17 const VECTOR_SIZE: usize = size_of::<__m128i>(); 18 const CONTROL_ANY: i32 = 19 _SIDD_UBYTE_OPS 20 | _SIDD_CMP_EQUAL_ANY 21 | _SIDD_POSITIVE_POLARITY 22 | _SIDD_LEAST_SIGNIFICANT; 23 24 #[target_feature(enable = "sse4.2")] 25 pub unsafe fn memchr3( 26 n1: u8, n2: u8, n3: u8, 27 haystack: &[u8] 28 ) -> Option<usize> { 29 let vn1 = _mm_set1_epi8(n1 as i8); 30 let vn2 = _mm_set1_epi8(n2 as i8); 31 let vn3 = _mm_set1_epi8(n3 as i8); 32 let vn = _mm_setr_epi8( 33 n1 as i8, n2 as i8, n3 as i8, 0, 34 0, 0, 0, 0, 35 0, 0, 0, 0, 36 0, 0, 0, 0, 37 ); 38 let len = haystack.len(); 39 let start_ptr = haystack.as_ptr(); 40 let end_ptr = haystack[haystack.len()..].as_ptr(); 41 let mut ptr = start_ptr; 42 43 if haystack.len() < VECTOR_SIZE { 44 while ptr < end_ptr { 45 if *ptr == n1 || *ptr == n2 || *ptr == n3 { 46 return Some(sub(ptr, start_ptr)); 47 } 48 ptr = ptr.offset(1); 49 } 50 return None; 51 } 52 while ptr <= end_ptr.sub(VECTOR_SIZE) { 53 let chunk = _mm_loadu_si128(ptr as *const __m128i); 54 let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY); 55 if res < 16 { 56 return Some(sub(ptr, start_ptr) + res as usize); 57 } 58 ptr = ptr.add(VECTOR_SIZE); 59 } 60 if ptr < end_ptr { 61 debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); 62 ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); 63 debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); 64 65 return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); 66 } 67 None 68 } 69 70 /// Subtract `b` from `a` and return the difference. `a` should be greater than 71 /// or equal to `b`. 72 fn sub(a: *const u8, b: *const u8) -> usize { 73 debug_assert!(a >= b); 74 (a as usize) - (b as usize) 75 } 76