1*38fd1498Szrj // -*- C++ -*- 2*38fd1498Szrj 3*38fd1498Szrj // Copyright (C) 2007-2018 Free Software Foundation, Inc. 4*38fd1498Szrj // 5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 6*38fd1498Szrj // software; you can redistribute it and/or modify it under the terms 7*38fd1498Szrj // of the GNU General Public License as published by the Free Software 8*38fd1498Szrj // Foundation; either version 3, or (at your option) any later 9*38fd1498Szrj // version. 10*38fd1498Szrj 11*38fd1498Szrj // This library is distributed in the hope that it will be useful, but 12*38fd1498Szrj // WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*38fd1498Szrj // General Public License for more details. 15*38fd1498Szrj 16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 18*38fd1498Szrj // 3.1, as published by the Free Software Foundation. 19*38fd1498Szrj 20*38fd1498Szrj // You should have received a copy of the GNU General Public License and 21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*38fd1498Szrj // <http://www.gnu.org/licenses/>. 24*38fd1498Szrj 25*38fd1498Szrj /** @file parallel/find.h 26*38fd1498Szrj * @brief Parallel implementation base for std::find(), std::equal() 27*38fd1498Szrj * and related functions. 28*38fd1498Szrj * This file is a GNU parallel extension to the Standard C++ Library. 29*38fd1498Szrj */ 30*38fd1498Szrj 31*38fd1498Szrj // Written by Felix Putze and Johannes Singler. 32*38fd1498Szrj 33*38fd1498Szrj #ifndef _GLIBCXX_PARALLEL_FIND_H 34*38fd1498Szrj #define _GLIBCXX_PARALLEL_FIND_H 1 35*38fd1498Szrj 36*38fd1498Szrj #include <bits/stl_algobase.h> 37*38fd1498Szrj 38*38fd1498Szrj #include <parallel/features.h> 39*38fd1498Szrj #include <parallel/parallel.h> 40*38fd1498Szrj #include <parallel/compatibility.h> 41*38fd1498Szrj #include <parallel/equally_split.h> 42*38fd1498Szrj 43*38fd1498Szrj namespace __gnu_parallel 44*38fd1498Szrj { 45*38fd1498Szrj /** 46*38fd1498Szrj * @brief Parallel std::find, switch for different algorithms. 47*38fd1498Szrj * @param __begin1 Begin iterator of first sequence. 48*38fd1498Szrj * @param __end1 End iterator of first sequence. 49*38fd1498Szrj * @param __begin2 Begin iterator of second sequence. Must have same 50*38fd1498Szrj * length as first sequence. 51*38fd1498Szrj * @param __pred Find predicate. 52*38fd1498Szrj * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) 53*38fd1498Szrj * @return Place of finding in both sequences. 54*38fd1498Szrj */ 55*38fd1498Szrj template<typename _RAIter1, 56*38fd1498Szrj typename _RAIter2, 57*38fd1498Szrj typename _Pred, 58*38fd1498Szrj typename _Selector> 59*38fd1498Szrj inline std::pair<_RAIter1, _RAIter2> __find_template(_RAIter1 __begin1,_RAIter1 __end1,_RAIter2 __begin2,_Pred __pred,_Selector __selector)60*38fd1498Szrj __find_template(_RAIter1 __begin1, _RAIter1 __end1, 61*38fd1498Szrj _RAIter2 __begin2, _Pred __pred, _Selector __selector) 62*38fd1498Szrj { 63*38fd1498Szrj switch (_Settings::get().find_algorithm) 64*38fd1498Szrj { 65*38fd1498Szrj case GROWING_BLOCKS: 66*38fd1498Szrj return __find_template(__begin1, __end1, __begin2, __pred, 67*38fd1498Szrj __selector, growing_blocks_tag()); 68*38fd1498Szrj case CONSTANT_SIZE_BLOCKS: 69*38fd1498Szrj return __find_template(__begin1, __end1, __begin2, __pred, 70*38fd1498Szrj __selector, constant_size_blocks_tag()); 71*38fd1498Szrj case EQUAL_SPLIT: 72*38fd1498Szrj return __find_template(__begin1, __end1, __begin2, __pred, 73*38fd1498Szrj __selector, equal_split_tag()); 74*38fd1498Szrj default: 75*38fd1498Szrj _GLIBCXX_PARALLEL_ASSERT(false); 76*38fd1498Szrj return std::make_pair(__begin1, __begin2); 77*38fd1498Szrj } 78*38fd1498Szrj } 79*38fd1498Szrj 80*38fd1498Szrj #if _GLIBCXX_FIND_EQUAL_SPLIT 81*38fd1498Szrj 82*38fd1498Szrj /** 83*38fd1498Szrj * @brief Parallel std::find, equal splitting variant. 84*38fd1498Szrj * @param __begin1 Begin iterator of first sequence. 85*38fd1498Szrj * @param __end1 End iterator of first sequence. 86*38fd1498Szrj * @param __begin2 Begin iterator of second sequence. Second __sequence 87*38fd1498Szrj * must have same length as first sequence. 88*38fd1498Szrj * @param __pred Find predicate. 89*38fd1498Szrj * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) 90*38fd1498Szrj * @return Place of finding in both sequences. 91*38fd1498Szrj */ 92*38fd1498Szrj template<typename _RAIter1, 93*38fd1498Szrj typename _RAIter2, 94*38fd1498Szrj typename _Pred, 95*38fd1498Szrj typename _Selector> 96*38fd1498Szrj std::pair<_RAIter1, _RAIter2> __find_template(_RAIter1 __begin1,_RAIter1 __end1,_RAIter2 __begin2,_Pred __pred,_Selector __selector,equal_split_tag)97*38fd1498Szrj __find_template(_RAIter1 __begin1, _RAIter1 __end1, 98*38fd1498Szrj _RAIter2 __begin2, _Pred __pred, 99*38fd1498Szrj _Selector __selector, equal_split_tag) 100*38fd1498Szrj { 101*38fd1498Szrj _GLIBCXX_CALL(__end1 - __begin1) 102*38fd1498Szrj 103*38fd1498Szrj typedef std::iterator_traits<_RAIter1> _TraitsType; 104*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 105*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 106*38fd1498Szrj 107*38fd1498Szrj _DifferenceType __length = __end1 - __begin1; 108*38fd1498Szrj _DifferenceType __result = __length; 109*38fd1498Szrj _DifferenceType* __borders; 110*38fd1498Szrj 111*38fd1498Szrj omp_lock_t __result_lock; 112*38fd1498Szrj omp_init_lock(&__result_lock); 113*38fd1498Szrj 114*38fd1498Szrj _ThreadIndex __num_threads = __get_max_threads(); 115*38fd1498Szrj # pragma omp parallel num_threads(__num_threads) 116*38fd1498Szrj { 117*38fd1498Szrj # pragma omp single 118*38fd1498Szrj { 119*38fd1498Szrj __num_threads = omp_get_num_threads(); 120*38fd1498Szrj __borders = new _DifferenceType[__num_threads + 1]; 121*38fd1498Szrj __equally_split(__length, __num_threads, __borders); 122*38fd1498Szrj } //single 123*38fd1498Szrj 124*38fd1498Szrj _ThreadIndex __iam = omp_get_thread_num(); 125*38fd1498Szrj _DifferenceType __start = __borders[__iam], 126*38fd1498Szrj __stop = __borders[__iam + 1]; 127*38fd1498Szrj 128*38fd1498Szrj _RAIter1 __i1 = __begin1 + __start; 129*38fd1498Szrj _RAIter2 __i2 = __begin2 + __start; 130*38fd1498Szrj for (_DifferenceType __pos = __start; __pos < __stop; ++__pos) 131*38fd1498Szrj { 132*38fd1498Szrj # pragma omp flush(__result) 133*38fd1498Szrj // Result has been set to something lower. 134*38fd1498Szrj if (__result < __pos) 135*38fd1498Szrj break; 136*38fd1498Szrj 137*38fd1498Szrj if (__selector(__i1, __i2, __pred)) 138*38fd1498Szrj { 139*38fd1498Szrj omp_set_lock(&__result_lock); 140*38fd1498Szrj if (__pos < __result) 141*38fd1498Szrj __result = __pos; 142*38fd1498Szrj omp_unset_lock(&__result_lock); 143*38fd1498Szrj break; 144*38fd1498Szrj } 145*38fd1498Szrj ++__i1; 146*38fd1498Szrj ++__i2; 147*38fd1498Szrj } 148*38fd1498Szrj } //parallel 149*38fd1498Szrj 150*38fd1498Szrj omp_destroy_lock(&__result_lock); 151*38fd1498Szrj delete[] __borders; 152*38fd1498Szrj 153*38fd1498Szrj return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, 154*38fd1498Szrj __begin2 + __result); 155*38fd1498Szrj } 156*38fd1498Szrj 157*38fd1498Szrj #endif 158*38fd1498Szrj 159*38fd1498Szrj #if _GLIBCXX_FIND_GROWING_BLOCKS 160*38fd1498Szrj 161*38fd1498Szrj /** 162*38fd1498Szrj * @brief Parallel std::find, growing block size variant. 163*38fd1498Szrj * @param __begin1 Begin iterator of first sequence. 164*38fd1498Szrj * @param __end1 End iterator of first sequence. 165*38fd1498Szrj * @param __begin2 Begin iterator of second sequence. Second __sequence 166*38fd1498Szrj * must have same length as first sequence. 167*38fd1498Szrj * @param __pred Find predicate. 168*38fd1498Szrj * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) 169*38fd1498Szrj * @return Place of finding in both sequences. 170*38fd1498Szrj * @see __gnu_parallel::_Settings::find_sequential_search_size 171*38fd1498Szrj * @see __gnu_parallel::_Settings::find_scale_factor 172*38fd1498Szrj * 173*38fd1498Szrj * There are two main differences between the growing blocks and 174*38fd1498Szrj * the constant-size blocks variants. 175*38fd1498Szrj * 1. For GB, the block size grows; for CSB, the block size is fixed. 176*38fd1498Szrj * 2. For GB, the blocks are allocated dynamically; 177*38fd1498Szrj * for CSB, the blocks are allocated in a predetermined manner, 178*38fd1498Szrj * namely spacial round-robin. 179*38fd1498Szrj */ 180*38fd1498Szrj template<typename _RAIter1, 181*38fd1498Szrj typename _RAIter2, 182*38fd1498Szrj typename _Pred, 183*38fd1498Szrj typename _Selector> 184*38fd1498Szrj std::pair<_RAIter1, _RAIter2> __find_template(_RAIter1 __begin1,_RAIter1 __end1,_RAIter2 __begin2,_Pred __pred,_Selector __selector,growing_blocks_tag)185*38fd1498Szrj __find_template(_RAIter1 __begin1, _RAIter1 __end1, 186*38fd1498Szrj _RAIter2 __begin2, _Pred __pred, _Selector __selector, 187*38fd1498Szrj growing_blocks_tag) 188*38fd1498Szrj { 189*38fd1498Szrj _GLIBCXX_CALL(__end1 - __begin1) 190*38fd1498Szrj 191*38fd1498Szrj typedef std::iterator_traits<_RAIter1> _TraitsType; 192*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 193*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 194*38fd1498Szrj 195*38fd1498Szrj const _Settings& __s = _Settings::get(); 196*38fd1498Szrj 197*38fd1498Szrj _DifferenceType __length = __end1 - __begin1; 198*38fd1498Szrj 199*38fd1498Szrj _DifferenceType 200*38fd1498Szrj __sequential_search_size = std::min<_DifferenceType> 201*38fd1498Szrj (__length, __s.find_sequential_search_size); 202*38fd1498Szrj 203*38fd1498Szrj // Try it sequentially first. 204*38fd1498Szrj std::pair<_RAIter1, _RAIter2> 205*38fd1498Szrj __find_seq_result = __selector._M_sequential_algorithm 206*38fd1498Szrj (__begin1, __begin1 + __sequential_search_size, 207*38fd1498Szrj __begin2, __pred); 208*38fd1498Szrj 209*38fd1498Szrj if (__find_seq_result.first != (__begin1 + __sequential_search_size)) 210*38fd1498Szrj return __find_seq_result; 211*38fd1498Szrj 212*38fd1498Szrj // Index of beginning of next free block (after sequential find). 213*38fd1498Szrj _DifferenceType __next_block_start = __sequential_search_size; 214*38fd1498Szrj _DifferenceType __result = __length; 215*38fd1498Szrj 216*38fd1498Szrj omp_lock_t __result_lock; 217*38fd1498Szrj omp_init_lock(&__result_lock); 218*38fd1498Szrj 219*38fd1498Szrj const float __scale_factor = __s.find_scale_factor; 220*38fd1498Szrj 221*38fd1498Szrj _ThreadIndex __num_threads = __get_max_threads(); 222*38fd1498Szrj # pragma omp parallel shared(__result) num_threads(__num_threads) 223*38fd1498Szrj { 224*38fd1498Szrj # pragma omp single 225*38fd1498Szrj __num_threads = omp_get_num_threads(); 226*38fd1498Szrj 227*38fd1498Szrj // Not within first __k elements -> start parallel. 228*38fd1498Szrj _ThreadIndex __iam = omp_get_thread_num(); 229*38fd1498Szrj 230*38fd1498Szrj _DifferenceType __block_size = 231*38fd1498Szrj std::max<_DifferenceType>(1, __scale_factor * __next_block_start); 232*38fd1498Szrj _DifferenceType __start = __fetch_and_add<_DifferenceType> 233*38fd1498Szrj (&__next_block_start, __block_size); 234*38fd1498Szrj 235*38fd1498Szrj // Get new block, update pointer to next block. 236*38fd1498Szrj _DifferenceType __stop = 237*38fd1498Szrj std::min<_DifferenceType>(__length, __start + __block_size); 238*38fd1498Szrj 239*38fd1498Szrj std::pair<_RAIter1, _RAIter2> __local_result; 240*38fd1498Szrj 241*38fd1498Szrj while (__start < __length) 242*38fd1498Szrj { 243*38fd1498Szrj # pragma omp flush(__result) 244*38fd1498Szrj // Get new value of result. 245*38fd1498Szrj if (__result < __start) 246*38fd1498Szrj { 247*38fd1498Szrj // No chance to find first element. 248*38fd1498Szrj break; 249*38fd1498Szrj } 250*38fd1498Szrj 251*38fd1498Szrj __local_result = __selector._M_sequential_algorithm 252*38fd1498Szrj (__begin1 + __start, __begin1 + __stop, 253*38fd1498Szrj __begin2 + __start, __pred); 254*38fd1498Szrj 255*38fd1498Szrj if (__local_result.first != (__begin1 + __stop)) 256*38fd1498Szrj { 257*38fd1498Szrj omp_set_lock(&__result_lock); 258*38fd1498Szrj if ((__local_result.first - __begin1) < __result) 259*38fd1498Szrj { 260*38fd1498Szrj __result = __local_result.first - __begin1; 261*38fd1498Szrj 262*38fd1498Szrj // Result cannot be in future blocks, stop algorithm. 263*38fd1498Szrj __fetch_and_add<_DifferenceType>(&__next_block_start, 264*38fd1498Szrj __length); 265*38fd1498Szrj } 266*38fd1498Szrj omp_unset_lock(&__result_lock); 267*38fd1498Szrj } 268*38fd1498Szrj 269*38fd1498Szrj _DifferenceType __block_size = 270*38fd1498Szrj std::max<_DifferenceType>(1, __scale_factor * __next_block_start); 271*38fd1498Szrj 272*38fd1498Szrj // Get new block, update pointer to next block. 273*38fd1498Szrj __start = __fetch_and_add<_DifferenceType>(&__next_block_start, 274*38fd1498Szrj __block_size); 275*38fd1498Szrj __stop = 276*38fd1498Szrj std::min<_DifferenceType>(__length, __start + __block_size); 277*38fd1498Szrj } 278*38fd1498Szrj } //parallel 279*38fd1498Szrj 280*38fd1498Szrj omp_destroy_lock(&__result_lock); 281*38fd1498Szrj 282*38fd1498Szrj // Return iterator on found element. 283*38fd1498Szrj return 284*38fd1498Szrj std::pair<_RAIter1, _RAIter2>(__begin1 + __result, 285*38fd1498Szrj __begin2 + __result); 286*38fd1498Szrj } 287*38fd1498Szrj 288*38fd1498Szrj #endif 289*38fd1498Szrj 290*38fd1498Szrj #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 291*38fd1498Szrj 292*38fd1498Szrj /** 293*38fd1498Szrj * @brief Parallel std::find, constant block size variant. 294*38fd1498Szrj * @param __begin1 Begin iterator of first sequence. 295*38fd1498Szrj * @param __end1 End iterator of first sequence. 296*38fd1498Szrj * @param __begin2 Begin iterator of second sequence. Second __sequence 297*38fd1498Szrj * must have same length as first sequence. 298*38fd1498Szrj * @param __pred Find predicate. 299*38fd1498Szrj * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) 300*38fd1498Szrj * @return Place of finding in both sequences. 301*38fd1498Szrj * @see __gnu_parallel::_Settings::find_sequential_search_size 302*38fd1498Szrj * @see __gnu_parallel::_Settings::find_block_size 303*38fd1498Szrj * There are two main differences between the growing blocks and the 304*38fd1498Szrj * constant-size blocks variants. 305*38fd1498Szrj * 1. For GB, the block size grows; for CSB, the block size is fixed. 306*38fd1498Szrj * 2. For GB, the blocks are allocated dynamically; for CSB, the 307*38fd1498Szrj * blocks are allocated in a predetermined manner, namely spacial 308*38fd1498Szrj * round-robin. 309*38fd1498Szrj */ 310*38fd1498Szrj template<typename _RAIter1, 311*38fd1498Szrj typename _RAIter2, 312*38fd1498Szrj typename _Pred, 313*38fd1498Szrj typename _Selector> 314*38fd1498Szrj std::pair<_RAIter1, _RAIter2> __find_template(_RAIter1 __begin1,_RAIter1 __end1,_RAIter2 __begin2,_Pred __pred,_Selector __selector,constant_size_blocks_tag)315*38fd1498Szrj __find_template(_RAIter1 __begin1, _RAIter1 __end1, 316*38fd1498Szrj _RAIter2 __begin2, _Pred __pred, _Selector __selector, 317*38fd1498Szrj constant_size_blocks_tag) 318*38fd1498Szrj { 319*38fd1498Szrj _GLIBCXX_CALL(__end1 - __begin1) 320*38fd1498Szrj typedef std::iterator_traits<_RAIter1> _TraitsType; 321*38fd1498Szrj typedef typename _TraitsType::difference_type _DifferenceType; 322*38fd1498Szrj typedef typename _TraitsType::value_type _ValueType; 323*38fd1498Szrj 324*38fd1498Szrj const _Settings& __s = _Settings::get(); 325*38fd1498Szrj 326*38fd1498Szrj _DifferenceType __length = __end1 - __begin1; 327*38fd1498Szrj 328*38fd1498Szrj _DifferenceType __sequential_search_size = std::min<_DifferenceType> 329*38fd1498Szrj (__length, __s.find_sequential_search_size); 330*38fd1498Szrj 331*38fd1498Szrj // Try it sequentially first. 332*38fd1498Szrj std::pair<_RAIter1, _RAIter2> 333*38fd1498Szrj __find_seq_result = __selector._M_sequential_algorithm 334*38fd1498Szrj (__begin1, __begin1 + __sequential_search_size, __begin2, __pred); 335*38fd1498Szrj 336*38fd1498Szrj if (__find_seq_result.first != (__begin1 + __sequential_search_size)) 337*38fd1498Szrj return __find_seq_result; 338*38fd1498Szrj 339*38fd1498Szrj _DifferenceType __result = __length; 340*38fd1498Szrj omp_lock_t __result_lock; 341*38fd1498Szrj omp_init_lock(&__result_lock); 342*38fd1498Szrj 343*38fd1498Szrj // Not within first __sequential_search_size elements -> start parallel. 344*38fd1498Szrj 345*38fd1498Szrj _ThreadIndex __num_threads = __get_max_threads(); 346*38fd1498Szrj # pragma omp parallel shared(__result) num_threads(__num_threads) 347*38fd1498Szrj { 348*38fd1498Szrj # pragma omp single 349*38fd1498Szrj __num_threads = omp_get_num_threads(); 350*38fd1498Szrj 351*38fd1498Szrj _ThreadIndex __iam = omp_get_thread_num(); 352*38fd1498Szrj _DifferenceType __block_size = __s.find_initial_block_size; 353*38fd1498Szrj 354*38fd1498Szrj // First element of thread's current iteration. 355*38fd1498Szrj _DifferenceType __iteration_start = __sequential_search_size; 356*38fd1498Szrj 357*38fd1498Szrj // Where to work (initialization). 358*38fd1498Szrj _DifferenceType __start = __iteration_start + __iam * __block_size; 359*38fd1498Szrj _DifferenceType __stop = std::min<_DifferenceType>(__length, 360*38fd1498Szrj __start 361*38fd1498Szrj + __block_size); 362*38fd1498Szrj 363*38fd1498Szrj std::pair<_RAIter1, _RAIter2> __local_result; 364*38fd1498Szrj 365*38fd1498Szrj while (__start < __length) 366*38fd1498Szrj { 367*38fd1498Szrj // Get new value of result. 368*38fd1498Szrj # pragma omp flush(__result) 369*38fd1498Szrj // No chance to find first element. 370*38fd1498Szrj if (__result < __start) 371*38fd1498Szrj break; 372*38fd1498Szrj 373*38fd1498Szrj __local_result = __selector._M_sequential_algorithm 374*38fd1498Szrj (__begin1 + __start, __begin1 + __stop, 375*38fd1498Szrj __begin2 + __start, __pred); 376*38fd1498Szrj 377*38fd1498Szrj if (__local_result.first != (__begin1 + __stop)) 378*38fd1498Szrj { 379*38fd1498Szrj omp_set_lock(&__result_lock); 380*38fd1498Szrj if ((__local_result.first - __begin1) < __result) 381*38fd1498Szrj __result = __local_result.first - __begin1; 382*38fd1498Szrj omp_unset_lock(&__result_lock); 383*38fd1498Szrj // Will not find better value in its interval. 384*38fd1498Szrj break; 385*38fd1498Szrj } 386*38fd1498Szrj 387*38fd1498Szrj __iteration_start += __num_threads * __block_size; 388*38fd1498Szrj 389*38fd1498Szrj // Where to work. 390*38fd1498Szrj __start = __iteration_start + __iam * __block_size; 391*38fd1498Szrj __stop = std::min<_DifferenceType>(__length, 392*38fd1498Szrj __start + __block_size); 393*38fd1498Szrj } 394*38fd1498Szrj } //parallel 395*38fd1498Szrj 396*38fd1498Szrj omp_destroy_lock(&__result_lock); 397*38fd1498Szrj 398*38fd1498Szrj // Return iterator on found element. 399*38fd1498Szrj return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, 400*38fd1498Szrj __begin2 + __result); 401*38fd1498Szrj } 402*38fd1498Szrj #endif 403*38fd1498Szrj } // end namespace 404*38fd1498Szrj 405*38fd1498Szrj #endif /* _GLIBCXX_PARALLEL_FIND_H */ 406