1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H 10 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H 11 12 #include <__algorithm/find_if.h> 13 #include <__algorithm/pstl_backends/cpu_backends/backend.h> 14 #include <__atomic/atomic.h> 15 #include <__config> 16 #include <__functional/operations.h> 17 #include <__iterator/iterator_traits.h> 18 #include <__type_traits/is_execution_policy.h> 19 #include <__utility/pair.h> 20 #include <__utility/terminate_on_exception.h> 21 #include <cstddef> 22 23 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 24 # pragma GCC system_header 25 #endif 26 27 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 28 29 _LIBCPP_BEGIN_NAMESPACE_STD 30 31 template <class _Index, class _Brick, class _Compare> 32 _LIBCPP_HIDE_FROM_ABI _Index 33 __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { 34 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; 35 const _DifferenceType __n = __last - __first; 36 _DifferenceType __initial_dist = __b_first ? __n : -1; 37 std::atomic<_DifferenceType> __extremum(__initial_dist); 38 // TODO: find out what is better here: parallel_for or parallel_reduce 39 __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { 40 // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of 41 // why using a shared variable scales fairly well in this situation. 42 if (__comp(__i - __first, __extremum)) { 43 _Index __res = __f(__i, __j); 44 // If not '__last' returned then we found what we want so put this to extremum 45 if (__res != __j) { 46 const _DifferenceType __k = __res - __first; 47 for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { 48 __extremum.compare_exchange_weak(__old, __k); 49 } 50 } 51 } 52 }); 53 return __extremum != __initial_dist ? __first + __extremum : __last; 54 } 55 56 template <class _Index, class _DifferenceType, class _Compare> 57 _LIBCPP_HIDE_FROM_ABI _Index 58 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept { 59 // Experiments show good block sizes like this 60 const _DifferenceType __block_size = 8; 61 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; 62 while (__end - __begin >= __block_size) { 63 _DifferenceType __found = 0; 64 _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) { 65 const _DifferenceType __t = __comp(__first, __i); 66 __lane[__i - __begin] = __t; 67 __found |= __t; 68 } 69 if (__found) { 70 _DifferenceType __i; 71 // This will vectorize 72 for (__i = 0; __i < __block_size; ++__i) { 73 if (__lane[__i]) { 74 break; 75 } 76 } 77 return __first + __begin + __i; 78 } 79 __begin += __block_size; 80 } 81 82 // Keep remainder scalar 83 while (__begin != __end) { 84 if (__comp(__first, __begin)) { 85 return __first + __begin; 86 } 87 ++__begin; 88 } 89 return __first + __end; 90 } 91 92 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate> 93 _LIBCPP_HIDE_FROM_ABI _ForwardIterator 94 __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { 95 if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && 96 __has_random_access_iterator_category<_ForwardIterator>::value) { 97 return std::__terminate_on_exception([&] { 98 return std::__parallel_find( 99 __first, 100 __last, 101 [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { 102 return std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>( 103 __cpu_backend_tag{}, __brick_first, __brick_last, __pred); 104 }, 105 less<>{}, 106 true); 107 }); 108 } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && 109 __has_random_access_iterator_category<_ForwardIterator>::value) { 110 using __diff_t = __iter_diff_t<_ForwardIterator>; 111 return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { 112 return __pred(__iter[__i]); 113 }); 114 } else { 115 return std::find_if(__first, __last, __pred); 116 } 117 } 118 119 _LIBCPP_END_NAMESPACE_STD 120 121 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 122 123 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H 124