1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H
10 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H
11 
12 #include <__algorithm/find_if.h>
13 #include <__algorithm/pstl_backends/cpu_backends/backend.h>
14 #include <__atomic/atomic.h>
15 #include <__config>
16 #include <__functional/operations.h>
17 #include <__iterator/iterator_traits.h>
18 #include <__type_traits/is_execution_policy.h>
19 #include <__utility/pair.h>
20 #include <__utility/terminate_on_exception.h>
21 #include <cstddef>
22 
23 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
24 #  pragma GCC system_header
25 #endif
26 
27 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
28 
29 _LIBCPP_BEGIN_NAMESPACE_STD
30 
31 template <class _Index, class _Brick, class _Compare>
32 _LIBCPP_HIDE_FROM_ABI _Index
33 __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
34   typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
35   const _DifferenceType __n      = __last - __first;
36   _DifferenceType __initial_dist = __b_first ? __n : -1;
37   std::atomic<_DifferenceType> __extremum(__initial_dist);
38   // TODO: find out what is better here: parallel_for or parallel_reduce
39   __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
40     // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
41     // why using a shared variable scales fairly well in this situation.
42     if (__comp(__i - __first, __extremum)) {
43       _Index __res = __f(__i, __j);
44       // If not '__last' returned then we found what we want so put this to extremum
45       if (__res != __j) {
46         const _DifferenceType __k = __res - __first;
47         for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
48           __extremum.compare_exchange_weak(__old, __k);
49         }
50       }
51     }
52   });
53   return __extremum != __initial_dist ? __first + __extremum : __last;
54 }
55 
56 template <class _Index, class _DifferenceType, class _Compare>
57 _LIBCPP_HIDE_FROM_ABI _Index
58 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
59   // Experiments show good block sizes like this
60   const _DifferenceType __block_size                        = 8;
61   alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
62   while (__end - __begin >= __block_size) {
63     _DifferenceType __found = 0;
64     _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
65       const _DifferenceType __t = __comp(__first, __i);
66       __lane[__i - __begin]     = __t;
67       __found |= __t;
68     }
69     if (__found) {
70       _DifferenceType __i;
71       // This will vectorize
72       for (__i = 0; __i < __block_size; ++__i) {
73         if (__lane[__i]) {
74           break;
75         }
76       }
77       return __first + __begin + __i;
78     }
79     __begin += __block_size;
80   }
81 
82   // Keep remainder scalar
83   while (__begin != __end) {
84     if (__comp(__first, __begin)) {
85       return __first + __begin;
86     }
87     ++__begin;
88   }
89   return __first + __end;
90 }
91 
92 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
93 _LIBCPP_HIDE_FROM_ABI _ForwardIterator
94 __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
95   if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
96                 __has_random_access_iterator_category<_ForwardIterator>::value) {
97     return std::__terminate_on_exception([&] {
98       return std::__parallel_find(
99           __first,
100           __last,
101           [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
102             return std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>(
103                 __cpu_backend_tag{}, __brick_first, __brick_last, __pred);
104           },
105           less<>{},
106           true);
107     });
108   } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
109                        __has_random_access_iterator_category<_ForwardIterator>::value) {
110     using __diff_t = __iter_diff_t<_ForwardIterator>;
111     return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
112       return __pred(__iter[__i]);
113     });
114   } else {
115     return std::find_if(__first, __last, __pred);
116   }
117 }
118 
119 _LIBCPP_END_NAMESPACE_STD
120 
121 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
122 
123 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H
124