1 /* -------------------------------------------------------------------------- *
2 * Simbody(tm): SimTKcommon *
3 * -------------------------------------------------------------------------- *
4 * This is part of the SimTK biosimulation toolkit originating from *
5 * Simbios, the NIH National Center for Physics-Based Simulation of *
6 * Biological Structures at Stanford, funded under the NIH Roadmap for *
7 * Medical Research, grant U54 GM072970. See https://simtk.org/home/simbody. *
8 * *
9 * Portions copyright (c) 2008-12 Stanford University and the Authors. *
10 * Authors: Peter Eastman *
11 * Contributors: *
12 * *
13 * Licensed under the Apache License, Version 2.0 (the "License"); you may *
14 * not use this file except in compliance with the License. You may obtain a *
15 * copy of the License at http://www.apache.org/licenses/LICENSE-2.0. *
16 * *
17 * Unless required by applicable law or agreed to in writing, software *
18 * distributed under the License is distributed on an "AS IS" BASIS, *
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
20 * See the License for the specific language governing permissions and *
21 * limitations under the License. *
22 * -------------------------------------------------------------------------- */
23
24 #include "Parallel2DExecutorImpl.h"
25 #include "SimTKcommon/internal/ParallelExecutor.h"
26 #include <utility>
27
28 using std::pair;
29
30 namespace SimTK {
31
Parallel2DExecutorImpl(int gridSize,int numProcessors)32 Parallel2DExecutorImpl::Parallel2DExecutorImpl(int gridSize, int numProcessors) : gridSize(gridSize), ownExecutor(true) {
33 numProcessors = std::min(numProcessors, gridSize/2);
34 if (numProcessors < 2)
35 executor = 0;
36 else
37 executor = new ParallelExecutor(numProcessors);
38 init(numProcessors);
39 }
Parallel2DExecutorImpl(int gridSize,ParallelExecutor & executor)40 Parallel2DExecutorImpl::Parallel2DExecutorImpl(int gridSize, ParallelExecutor& executor) : gridSize(gridSize), executor(&executor), ownExecutor(false) {
41 init(executor.getNumProcessors());
42 }
init(int numProcessors)43 void Parallel2DExecutorImpl::init(int numProcessors) {
44 int bins;
45 if (numProcessors < 2) {
46 bins = 1;
47 }
48 else {
49
50 // Determine how many levels of subdivision to use.
51
52 int levels = 1;
53 while (1<<levels < numProcessors)
54 levels++;
55 levels++;
56 bins = 1<<levels;
57
58 // Build the set of squares.
59
60 squares.resize(bins-1);
61 addTriangle(0, 0, 0, levels);
62 }
63
64 // Find the range of indices in each bin.
65
66 binStart.resize(bins+1);
67 for (int i = 0; i < bins; ++i)
68 binStart[i] = (int) std::floor(0.5+i*gridSize/(double) bins);
69 binStart[bins] = gridSize;
70 }
~Parallel2DExecutorImpl()71 Parallel2DExecutorImpl::~Parallel2DExecutorImpl() {
72 if (executor != 0 && ownExecutor)
73 delete executor;
74 }
addSquare(int x,int y,int pass,int level)75 void Parallel2DExecutorImpl::addSquare(int x, int y, int pass, int level) {
76 if (level == 0)
77 squares[pass-1].push_back(pair<int, int>(x, y));
78 else {
79 addSquare(2*x+0, 2*y+1, 2*pass+1, level-1);
80 addSquare(2*x+1, 2*y+2, 2*pass+1, level-1);
81 addSquare(2*x+0, 2*y+2, 2*pass+2, level-1);
82 addSquare(2*x+1, 2*y+1, 2*pass+2, level-1);
83 }
84 }
addTriangle(int x,int y,int pass,int level)85 void Parallel2DExecutorImpl::addTriangle(int x, int y, int pass, int level) {
86 if (level > 1) {
87 addSquare(2*x, 2*y, 2*pass, level-1);
88 addTriangle(2*x, 2*y, 2*pass, level-1);
89 addTriangle(2*x+1, 2*y+1, 2*pass, level-1);
90 }
91 }
clone() const92 Parallel2DExecutorImpl* Parallel2DExecutorImpl::clone() const {
93 if (ownExecutor)
94 return new Parallel2DExecutorImpl(gridSize, executor->getNumProcessors());
95 return new Parallel2DExecutorImpl(gridSize, *executor);
96 }
getExecutor()97 ParallelExecutor& Parallel2DExecutorImpl::getExecutor() {
98 return *executor;
99 }
100
101 class Parallel2DExecutorImpl::TriangleTask : public ParallelExecutor::Task {
102 public:
TriangleTask(const Parallel2DExecutorImpl & executor,Parallel2DExecutor::Task & task,Parallel2DExecutor::RangeType rangeType,int width,bool shouldInitialize,bool shouldFinish)103 TriangleTask(const Parallel2DExecutorImpl& executor, Parallel2DExecutor::Task& task, Parallel2DExecutor::RangeType rangeType, int width, bool shouldInitialize, bool shouldFinish) :
104 executor(executor), task(task), rangeType(rangeType), width(width), shouldInitialize(shouldInitialize), shouldFinish(shouldFinish) {
105 }
execute(int index)106 void execute(int index) override {
107 int start = executor.getBinStart(width*index);
108 int end = executor.getBinStart(width*(index+1));
109 switch (rangeType) {
110 case Parallel2DExecutor::FullMatrix:
111 for (int i = start; i < end; ++i)
112 for (int j = start; j < end; ++j)
113 task.execute(i, j);
114 return;
115 case Parallel2DExecutor::HalfMatrix:
116 for (int i = start; i < end; ++i)
117 for (int j = start; j < i; ++j)
118 task.execute(i, j);
119 return;
120 case Parallel2DExecutor::HalfPlusDiagonal:
121 for (int i = start; i < end; ++i)
122 for (int j = start; j <= i; ++j)
123 task.execute(i, j);
124 return;
125 }
126 }
initialize()127 void initialize() override {
128 if (shouldInitialize)
129 task.initialize();
130 }
finish()131 void finish() override {
132 if (shouldFinish)
133 task.finish();
134 }
135 private:
136 const Parallel2DExecutorImpl& executor;
137 Parallel2DExecutor::Task& task;
138 const Parallel2DExecutor::RangeType rangeType;
139 const int width;
140 const bool shouldInitialize, shouldFinish;
141 };
142
143 class Parallel2DExecutorImpl::SquareTask : public ParallelExecutor::Task {
144 public:
SquareTask(const Parallel2DExecutorImpl & executor,Parallel2DExecutor::Task & task,const Array_<pair<int,int>> & squares,Parallel2DExecutor::RangeType rangeType,bool shouldInitialize,bool shouldFinish)145 SquareTask(const Parallel2DExecutorImpl& executor, Parallel2DExecutor::Task& task, const Array_<pair<int,int> >& squares, Parallel2DExecutor::RangeType rangeType, bool shouldInitialize, bool shouldFinish) :
146 executor(executor), task(task), squares(squares), rangeType(rangeType), shouldInitialize(shouldInitialize), shouldFinish(shouldFinish) {
147 }
execute(int index)148 void execute(int index) override {
149 const pair<int,int>& square = squares[index];
150 int istart = executor.getBinStart(square.second+1);
151 int iend = executor.getBinStart(square.second+2);
152 int jstart = executor.getBinStart(square.first);
153 int jend = executor.getBinStart(square.first+1);
154 switch (rangeType) {
155 case Parallel2DExecutor::FullMatrix:
156 for (int i = istart; i < iend; ++i)
157 for (int j = jstart; j < jend; ++j) {
158 task.execute(i, j);
159 task.execute(j, i);
160 }
161 return;
162 case Parallel2DExecutor::HalfMatrix:
163 case Parallel2DExecutor::HalfPlusDiagonal:
164 for (int i = istart; i < iend; ++i)
165 for (int j = jstart; j < jend; ++j)
166 task.execute(i, j);
167 return;
168 }
169 }
initialize()170 void initialize() override {
171 if (shouldInitialize)
172 task.initialize();
173 }
finish()174 void finish() override {
175 if (shouldFinish)
176 task.finish();
177 }
178 private:
179 const Parallel2DExecutorImpl& executor;
180 Parallel2DExecutor::Task& task;
181 const Array_<pair<int,int> >& squares;
182 const Parallel2DExecutor::RangeType rangeType;
183 bool shouldInitialize, shouldFinish;
184 };
185
execute(Parallel2DExecutor::Task & task,Parallel2DExecutor::RangeType rangeType)186 void Parallel2DExecutorImpl::execute(Parallel2DExecutor::Task& task, Parallel2DExecutor::RangeType rangeType) {
187 if (executor == 0) {
188 task.initialize();
189 TriangleTask(*this, task, rangeType, 1, false, false).execute(0);
190 task.finish();
191 return;
192 }
193 int bins = binStart.size()-1;
194
195 // Execute the blocks along the diagonal.
196
197 TriangleTask triangle(*this, task, rangeType, 2, true, false);
198 executor->execute(triangle, bins/2);
199
200 // Execute the square blocks in a series of passes.
201
202 for (int i = 0; i < (int)squares.size(); ++i) {
203 SquareTask square(*this, task, squares[i], rangeType, false,
204 i == (int)squares.size()-1);
205 executor->execute(square, squares[i].size());
206 }
207 }
208
Parallel2DExecutor(int gridSize,int numProcessors)209 Parallel2DExecutor::Parallel2DExecutor(int gridSize, int numProcessors) : HandleBase(new Parallel2DExecutorImpl(gridSize, numProcessors)) {
210 }
211
Parallel2DExecutor(int gridSize,ParallelExecutor & executor)212 Parallel2DExecutor::Parallel2DExecutor(int gridSize, ParallelExecutor& executor) : HandleBase(new Parallel2DExecutorImpl(gridSize, executor)) {
213 }
214
execute(Task & task,RangeType rangeType)215 void Parallel2DExecutor::execute(Task& task, RangeType rangeType) {
216 updImpl().execute(task, rangeType);
217 }
218
getExecutor()219 ParallelExecutor& Parallel2DExecutor::getExecutor() {
220 return updImpl().getExecutor();
221 }
222
223 } // namespace SimTK
224