1 /* -------------------------------------------------------------------------- *
2  *                       Simbody(tm): SimTKcommon                             *
3  * -------------------------------------------------------------------------- *
4  * This is part of the SimTK biosimulation toolkit originating from           *
5  * Simbios, the NIH National Center for Physics-Based Simulation of           *
6  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
7  * Medical Research, grant U54 GM072970. See https://simtk.org/home/simbody.  *
8  *                                                                            *
9  * Portions copyright (c) 2008-12 Stanford University and the Authors.        *
10  * Authors: Peter Eastman                                                     *
11  * Contributors:                                                              *
12  *                                                                            *
13  * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
14  * not use this file except in compliance with the License. You may obtain a  *
15  * copy of the License at http://www.apache.org/licenses/LICENSE-2.0.         *
16  *                                                                            *
17  * Unless required by applicable law or agreed to in writing, software        *
18  * distributed under the License is distributed on an "AS IS" BASIS,          *
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
20  * See the License for the specific language governing permissions and        *
21  * limitations under the License.                                             *
22  * -------------------------------------------------------------------------- */
23 
24 #include "Parallel2DExecutorImpl.h"
25 #include "SimTKcommon/internal/ParallelExecutor.h"
26 #include <utility>
27 
28 using std::pair;
29 
30 namespace SimTK {
31 
Parallel2DExecutorImpl(int gridSize,int numProcessors)32 Parallel2DExecutorImpl::Parallel2DExecutorImpl(int gridSize, int numProcessors) : gridSize(gridSize), ownExecutor(true) {
33     numProcessors = std::min(numProcessors, gridSize/2);
34     if (numProcessors < 2)
35         executor = 0;
36     else
37         executor = new ParallelExecutor(numProcessors);
38     init(numProcessors);
39 }
Parallel2DExecutorImpl(int gridSize,ParallelExecutor & executor)40 Parallel2DExecutorImpl::Parallel2DExecutorImpl(int gridSize, ParallelExecutor& executor) : gridSize(gridSize), executor(&executor), ownExecutor(false) {
41     init(executor.getNumProcessors());
42 }
init(int numProcessors)43 void Parallel2DExecutorImpl::init(int numProcessors) {
44     int bins;
45     if (numProcessors < 2) {
46         bins = 1;
47     }
48     else {
49 
50         // Determine how many levels of subdivision to use.
51 
52         int levels = 1;
53         while (1<<levels < numProcessors)
54             levels++;
55         levels++;
56         bins = 1<<levels;
57 
58         // Build the set of squares.
59 
60         squares.resize(bins-1);
61         addTriangle(0, 0, 0, levels);
62     }
63 
64     // Find the range of indices in each bin.
65 
66     binStart.resize(bins+1);
67     for (int i = 0; i < bins; ++i)
68         binStart[i] = (int) std::floor(0.5+i*gridSize/(double) bins);
69     binStart[bins] = gridSize;
70 }
~Parallel2DExecutorImpl()71 Parallel2DExecutorImpl::~Parallel2DExecutorImpl() {
72     if (executor != 0 && ownExecutor)
73         delete executor;
74 }
addSquare(int x,int y,int pass,int level)75 void Parallel2DExecutorImpl::addSquare(int x, int y, int pass, int level) {
76     if (level == 0)
77         squares[pass-1].push_back(pair<int, int>(x, y));
78     else {
79         addSquare(2*x+0, 2*y+1, 2*pass+1, level-1);
80         addSquare(2*x+1, 2*y+2, 2*pass+1, level-1);
81         addSquare(2*x+0, 2*y+2, 2*pass+2, level-1);
82         addSquare(2*x+1, 2*y+1, 2*pass+2, level-1);
83     }
84 }
addTriangle(int x,int y,int pass,int level)85 void Parallel2DExecutorImpl::addTriangle(int x, int y, int pass, int level) {
86     if (level > 1) {
87         addSquare(2*x, 2*y, 2*pass, level-1);
88         addTriangle(2*x, 2*y, 2*pass, level-1);
89         addTriangle(2*x+1, 2*y+1, 2*pass, level-1);
90     }
91 }
clone() const92 Parallel2DExecutorImpl* Parallel2DExecutorImpl::clone() const {
93     if (ownExecutor)
94         return new Parallel2DExecutorImpl(gridSize, executor->getNumProcessors());
95     return new Parallel2DExecutorImpl(gridSize, *executor);
96 }
getExecutor()97 ParallelExecutor& Parallel2DExecutorImpl::getExecutor() {
98     return *executor;
99 }
100 
101 class Parallel2DExecutorImpl::TriangleTask : public ParallelExecutor::Task {
102 public:
TriangleTask(const Parallel2DExecutorImpl & executor,Parallel2DExecutor::Task & task,Parallel2DExecutor::RangeType rangeType,int width,bool shouldInitialize,bool shouldFinish)103     TriangleTask(const Parallel2DExecutorImpl& executor, Parallel2DExecutor::Task& task, Parallel2DExecutor::RangeType rangeType, int width, bool shouldInitialize, bool shouldFinish) :
104         executor(executor), task(task), rangeType(rangeType), width(width), shouldInitialize(shouldInitialize), shouldFinish(shouldFinish) {
105     }
execute(int index)106     void execute(int index) override {
107         int start = executor.getBinStart(width*index);
108         int end = executor.getBinStart(width*(index+1));
109         switch (rangeType) {
110         case Parallel2DExecutor::FullMatrix:
111             for (int i = start; i < end; ++i)
112                 for (int j = start; j < end; ++j)
113                     task.execute(i, j);
114             return;
115         case Parallel2DExecutor::HalfMatrix:
116             for (int i = start; i < end; ++i)
117                 for (int j = start; j < i; ++j)
118                     task.execute(i, j);
119             return;
120         case Parallel2DExecutor::HalfPlusDiagonal:
121             for (int i = start; i < end; ++i)
122                 for (int j = start; j <= i; ++j)
123                     task.execute(i, j);
124             return;
125         }
126     }
initialize()127     void initialize() override {
128         if (shouldInitialize)
129             task.initialize();
130     }
finish()131     void finish() override {
132         if (shouldFinish)
133             task.finish();
134     }
135 private:
136     const Parallel2DExecutorImpl& executor;
137     Parallel2DExecutor::Task& task;
138     const Parallel2DExecutor::RangeType rangeType;
139     const int width;
140     const bool shouldInitialize, shouldFinish;
141 };
142 
143 class Parallel2DExecutorImpl::SquareTask : public ParallelExecutor::Task {
144 public:
SquareTask(const Parallel2DExecutorImpl & executor,Parallel2DExecutor::Task & task,const Array_<pair<int,int>> & squares,Parallel2DExecutor::RangeType rangeType,bool shouldInitialize,bool shouldFinish)145     SquareTask(const Parallel2DExecutorImpl& executor, Parallel2DExecutor::Task& task, const Array_<pair<int,int> >& squares, Parallel2DExecutor::RangeType rangeType, bool shouldInitialize, bool shouldFinish) :
146         executor(executor), task(task), squares(squares), rangeType(rangeType), shouldInitialize(shouldInitialize), shouldFinish(shouldFinish) {
147     }
execute(int index)148     void execute(int index) override {
149         const pair<int,int>& square = squares[index];
150         int istart = executor.getBinStart(square.second+1);
151         int iend = executor.getBinStart(square.second+2);
152         int jstart = executor.getBinStart(square.first);
153         int jend = executor.getBinStart(square.first+1);
154         switch (rangeType) {
155         case Parallel2DExecutor::FullMatrix:
156             for (int i = istart; i < iend; ++i)
157                 for (int j = jstart; j < jend; ++j) {
158                     task.execute(i, j);
159                     task.execute(j, i);
160                 }
161             return;
162         case Parallel2DExecutor::HalfMatrix:
163         case Parallel2DExecutor::HalfPlusDiagonal:
164             for (int i = istart; i < iend; ++i)
165                 for (int j = jstart; j < jend; ++j)
166                     task.execute(i, j);
167             return;
168         }
169     }
initialize()170     void initialize() override {
171         if (shouldInitialize)
172             task.initialize();
173     }
finish()174     void finish() override {
175         if (shouldFinish)
176             task.finish();
177     }
178 private:
179     const Parallel2DExecutorImpl& executor;
180     Parallel2DExecutor::Task& task;
181     const Array_<pair<int,int> >& squares;
182     const Parallel2DExecutor::RangeType rangeType;
183     bool shouldInitialize, shouldFinish;
184 };
185 
execute(Parallel2DExecutor::Task & task,Parallel2DExecutor::RangeType rangeType)186 void Parallel2DExecutorImpl::execute(Parallel2DExecutor::Task& task, Parallel2DExecutor::RangeType rangeType) {
187     if (executor == 0) {
188         task.initialize();
189         TriangleTask(*this, task, rangeType, 1, false, false).execute(0);
190         task.finish();
191         return;
192     }
193     int bins = binStart.size()-1;
194 
195     // Execute the blocks along the diagonal.
196 
197     TriangleTask triangle(*this, task, rangeType, 2, true, false);
198     executor->execute(triangle, bins/2);
199 
200     // Execute the square blocks in a series of passes.
201 
202     for (int i = 0; i < (int)squares.size(); ++i) {
203         SquareTask square(*this, task, squares[i], rangeType, false,
204                           i == (int)squares.size()-1);
205         executor->execute(square, squares[i].size());
206     }
207 }
208 
Parallel2DExecutor(int gridSize,int numProcessors)209 Parallel2DExecutor::Parallel2DExecutor(int gridSize, int numProcessors) : HandleBase(new Parallel2DExecutorImpl(gridSize, numProcessors)) {
210 }
211 
Parallel2DExecutor(int gridSize,ParallelExecutor & executor)212 Parallel2DExecutor::Parallel2DExecutor(int gridSize, ParallelExecutor& executor) : HandleBase(new Parallel2DExecutorImpl(gridSize, executor)) {
213 }
214 
execute(Task & task,RangeType rangeType)215 void Parallel2DExecutor::execute(Task& task, RangeType rangeType) {
216     updImpl().execute(task, rangeType);
217 }
218 
getExecutor()219 ParallelExecutor& Parallel2DExecutor::getExecutor() {
220     return updImpl().getExecutor();
221 }
222 
223 } // namespace SimTK
224