1 /**
2 * Mandelbulber v2, a 3D fractal generator ,=#MKNmMMKmmßMNWy,
3 * ,B" ]L,,p%%%,,,§;, "K
4 * Copyright (C) 2017-20 Mandelbulber Team §R-==%w["'~5]m%=L.=~5N
5 * ,=mm=§M ]=4 yJKA"/-Nsaj "Bw,==,,
6 * This file is part of Mandelbulber. §R.r= jw",M Km .mM FW ",§=ß., ,TN
7 * ,4R =%["w[N=7]J '"5=],""]]M,w,-; T=]M
8 * Mandelbulber is free software: §R.ß~-Q/M=,=5"v"]=Qf,'§"M= =,M.§ Rz]M"Kw
9 * you can redistribute it and/or §w "xDY.J ' -"m=====WeC=\ ""%""y=%"]"" §
10 * modify it under the terms of the "§M=M =D=4"N #"%==A%p M§ M6 R' #"=~.4M
11 * GNU General Public License as §W =, ][T"]C § § '§ e===~ U !§[Z ]N
12 * published by the 4M",,Jm=,"=e~ § § j]]""N BmM"py=ßM
13 * Free Software Foundation, ]§ T,M=& 'YmMMpM9MMM%=w=,,=MT]M m§;'§,
14 * either version 3 of the License, TWw [.j"5=~N[=§%=%W,T ]R,"=="Y[LFT ]N
15 * or (at your option) TW=,-#"%=;[ =Q:["V"" ],,M.m == ]N
16 * any later version. J§"mr"] ,=,," =="""J]= M"M"]==ß"
17 * §= "=C=4 §"eM "=B:m|4"]#F,§~
18 * Mandelbulber is distributed in "9w=,,]w em%wJ '"~" ,=,,ß"
19 * the hope that it will be useful, . "K= ,=RMMMßM"""
20 * but WITHOUT ANY WARRANTY; .'''
21 * without even the implied warranty
22 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23 *
24 * See the GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with Mandelbulber. If not, see <http://www.gnu.org/licenses/>.
27 *
28 * ###########################################################################
29 *
30 * Authors: Krzysztof Marczak (buddhi1980@gmail.com), Robert Pancoast (RobertPancoast77@gmail.com)
31 *
32 * Created on: 3 maj 2017
33 * Author: krzysztof
34 */
35
36 #include "opencl_engine.h"
37
38 #include <iostream>
39 #include <memory>
40 #include <sstream>
41
42 #include <QCryptographicHash>
43 #include <QDebug>
44 #include <QDir>
45 #include <QElapsedTimer>
46
47 #include "error_message.hpp"
48 #include "opencl_hardware.h"
49 #include "parameters.hpp"
50 #include "write_log.hpp"
51
cOpenClEngine(cOpenClHardware * _hardware)52 cOpenClEngine::cOpenClEngine(cOpenClHardware *_hardware) : QObject(_hardware), hardware(_hardware)
53 {
54 #ifdef USE_OPENCL
55 programsLoaded = false;
56 readyForRendering = false;
57 kernelCreated = false;
58 locked = false;
59 useBuildCache = true;
60 useFastRelaxedMath = false;
61
62 clKernels.append(std::shared_ptr<cl::Kernel>());
63 clQueues.append(std::shared_ptr<cl::CommandQueue>());
64 outputBuffers.append(listOfBuffers());
65 inputAndOutputBuffers.append(listOfBuffers());
66 inputBuffers.append(listOfBuffers());
67
68 #endif
69
70 connect(this, SIGNAL(showErrorMessage(QString, cErrorMessage::enumMessageType, QWidget *)),
71 gErrorMessage, SLOT(slotShowMessage(QString, cErrorMessage::enumMessageType, QWidget *)));
72 }
73
~cOpenClEngine()74 cOpenClEngine::~cOpenClEngine()
75 {
76 #ifdef USE_OPENCL
77 #endif
78 }
79
80 #ifdef USE_OPENCL
81
checkErr(cl_int err,QString functionName)82 bool cOpenClEngine::checkErr(cl_int err, QString functionName)
83 {
84 if (err != CL_SUCCESS)
85 {
86 qCritical() << "OpenCl ERROR: " << functionName << " (" << err << ")";
87 return false;
88 }
89 else
90 return true;
91 }
92
Build(const QByteArray & programString,QString * errorText,bool quiet)93 bool cOpenClEngine::Build(const QByteArray &programString, QString *errorText, bool quiet)
94 {
95 if (hardware->getClDevices(0).size() > 0 && hardware->getEnabledDevices().size() > 0)
96 {
97 // calculating hash code of the program
98 QCryptographicHash hashCryptProgram(QCryptographicHash::Md4);
99 hashCryptProgram.addData(programString);
100 // recompile also if selected devices changed
101 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
102 {
103 hashCryptProgram.addData(
104 reinterpret_cast<char *>(&hardware->getSelectedDevicesIndices()[d]), sizeof(int));
105 }
106 QByteArray hashProgram = hashCryptProgram.result();
107
108 // calculating hash code of build parameters
109 QCryptographicHash hashCryptBuildParams(QCryptographicHash::Md4);
110 hashCryptBuildParams.addData(definesCollector.toLocal8Bit());
111 QByteArray hashBuildParams = hashCryptBuildParams.result();
112
113 definesCollector += " -DCODEHASH=" + QString(hashProgram.toHex());
114
115 if (!useBuildCache) DeleteKernelCache();
116
117 // if program is different than in previous run
118 if (!(hashProgram == lastProgramHash && hashBuildParams == lastBuildParametersHash
119 && useBuildCache))
120 {
121 lastBuildParametersHash = hashBuildParams;
122 lastProgramHash = hashProgram;
123
124 // collecting all parts of program
125 cl::Program::Sources sources;
126 sources.emplace_back(programString.constData(), size_t(programString.length()));
127
128 // creating cl::Program
129 cl_int err = 0;
130
131 // Creates a program from source strings and Context.
132 // Context initialized with support for multiple devices.
133 // Therefore cl::Program initialized with device vector
134 // Does not compile or link the program.
135
136 clPrograms.clear();
137 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
138 {
139 clPrograms.append(
140 std::shared_ptr<cl::Program>(new cl::Program(*hardware->getContext(d), sources, &err)));
141 }
142
143 if (checkErr(err, "cl::Program()"))
144 {
145 std::string buildParams =
146 "-w -cl-single-precision-constant -cl-denorms-are-zero -cl-mad-enable";
147
148 if (useFastRelaxedMath) buildParams += " -cl-fast-relaxed-math";
149
150 buildParams.append(" -DOPENCL_KERNEL_CODE");
151
152 buildParams += definesCollector.toUtf8().constData();
153
154 WriteLogString("Build parameters", buildParams.c_str(), 2);
155
156 // cl::Program::Build (compiles and links) a multi-device program executable
157 // compiles and links for multiple devices simultaneously
158
159 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
160 {
161 err = clPrograms[d]->build(hardware->getClDevices(d), buildParams.c_str());
162 }
163
164 if (checkErr(err, "program->build()"))
165 {
166 WriteLog("OpenCl kernel program successfully compiled", 2);
167
168 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
169 {
170 std::vector<size_t> sizes;
171 err = clPrograms[d]->getInfo(CL_PROGRAM_BINARY_SIZES, &sizes);
172 WriteLogInt("Program size", sizes[d], 2);
173 }
174 return true;
175 }
176 else
177 {
178 std::stringstream errorMessageStream;
179 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
180 {
181 errorMessageStream << "Device #" << d << "\nOpenCL Build log:\n"
182 << clPrograms[d]->getBuildInfo<CL_PROGRAM_BUILD_LOG>(
183 *hardware->getEnabledDevices().at(d))
184 << std::endl;
185 }
186 std::string buildLogText = errorMessageStream.str();
187
188 *errorText = QString::fromStdString(errorMessageStream.str());
189
190 std::cerr << buildLogText;
191
192 if (!quiet)
193 {
194 emit showErrorMessage(
195 QObject::tr("Error during compilation of OpenCL program\n") + errorText->left(500),
196 cErrorMessage::errorMessage, nullptr);
197 }
198
199 lastBuildParametersHash.clear();
200 lastProgramHash.clear();
201
202 return false;
203 }
204 }
205 else
206 {
207 if (!quiet)
208 {
209 emit showErrorMessage(
210 QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("program")),
211 cErrorMessage::errorMessage, nullptr);
212 }
213 return false;
214 }
215 }
216 else
217 {
218 WriteLog("Re-compile is not needed", 2);
219 return true;
220 }
221 }
222 else
223 {
224 emit showErrorMessage(QObject::tr("No devices to use for OpenCL! Check program preferences."),
225 cErrorMessage::errorMessage, nullptr);
226 return false;
227 }
228 }
229
CreateKernel4Program(std::shared_ptr<const cParameterContainer> params)230 bool cOpenClEngine::CreateKernel4Program(std::shared_ptr<const cParameterContainer> params)
231 {
232 if (programsLoaded)
233 {
234 optimalJob.jobSizeMultiplier = quint64(params->Get<int>("opencl_job_size_multiplier"));
235
236 if (CreateKernels())
237 {
238 InitOptimalJob(params);
239 return true;
240 }
241 }
242 return false;
243 }
244
CreateKernels()245 bool cOpenClEngine::CreateKernels()
246 {
247 cl_int err;
248 clKernels.clear();
249 bool wasNoError = true;
250
251 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
252 {
253 clKernels.append(std::shared_ptr<cl::Kernel>(
254 new cl::Kernel(*clPrograms[d].get(), GetKernelName().toLatin1().constData(), &err)));
255
256 if (!checkErr(err, QString("Device #%1: cl::Kernel()").arg(d))) wasNoError = false;
257 }
258
259 if (wasNoError)
260 {
261 size_t workGroupSize = 0;
262
263 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
264 {
265 clKernels[d]->getWorkGroupInfo(
266 *hardware->getEnabledDevices().at(d), CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
267
268 WriteLogInt("Get info for device", d, 2);
269 WriteLogSizeT("CL_KERNEL_WORK_GROUP_SIZE", workGroupSize, 2);
270
271 size_t workGroupSizeOptimalMultiplier = 0;
272
273 clKernels[d]->getWorkGroupInfo(*hardware->getEnabledDevices().at(d),
274 CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &workGroupSizeOptimalMultiplier);
275 WriteLogSizeT(
276 "CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE", workGroupSizeOptimalMultiplier, 2);
277
278 if (d == 0)
279 {
280 optimalJob.workGroupSize = workGroupSize;
281 optimalJob.workGroupSizeOptimalMultiplier =
282 workGroupSizeOptimalMultiplier * optimalJob.jobSizeMultiplier;
283 }
284 else
285 {
286 optimalJob.workGroupSize = qMin(quint64(workGroupSize), optimalJob.workGroupSize);
287 optimalJob.workGroupSizeOptimalMultiplier =
288 qMax(workGroupSizeOptimalMultiplier * optimalJob.jobSizeMultiplier,
289 optimalJob.workGroupSizeOptimalMultiplier);
290 ;
291 }
292 }
293
294 kernelCreated = true;
295 return true;
296 }
297 else
298 {
299 emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("kernel")),
300 cErrorMessage::errorMessage, nullptr);
301 kernelCreated = false;
302 }
303 return false;
304 }
305
InitOptimalJob(std::shared_ptr<const cParameterContainer> params)306 void cOpenClEngine::InitOptimalJob(std::shared_ptr<const cParameterContainer> params)
307 {
308 quint64 width = params->Get<int>("image_width");
309 quint64 height = params->Get<int>("image_height");
310 quint64 memoryLimitByUser = params->Get<int>("opencl_memory_limit") * 1024UL * 1024UL;
311 quint64 pixelCnt = width * height;
312
313 cOpenClDevice::sDeviceInformation deviceInfo = hardware->getSelectedDevicesInformation().at(0);
314
315 optimalJob.stepSize = optimalJob.workGroupSize * optimalJob.workGroupSizeOptimalMultiplier;
316
317 qint64 exp = qint64(log(sqrt(double(optimalJob.stepSize + 1))) / log(2.0));
318
319 optimalJob.stepSizeX = quint64(pow(2.0, double(exp)));
320 optimalJob.stepSizeY = optimalJob.stepSize / optimalJob.stepSizeX;
321
322 // optimalJob.stepSizeX = 1;
323 // optimalJob.stepSizeY = 1;
324
325 optimalJob.workGroupSizeMultiplier = optimalJob.workGroupSizeOptimalMultiplier;
326 optimalJob.lastProcessingTime = 1.0;
327
328 quint64 maxAllocMemSize = quint64(deviceInfo.maxMemAllocSize);
329 quint64 memSize = memoryLimitByUser;
330 if (maxAllocMemSize > 0 && maxAllocMemSize * 0.75 < memoryLimitByUser)
331 {
332 memSize = quint64(maxAllocMemSize * 0.75);
333 }
334 if (optimalJob.sizeOfPixel != 0)
335 {
336 optimalJob.jobSizeLimit = memSize / optimalJob.sizeOfPixel;
337 }
338 else
339 {
340 optimalJob.jobSizeLimit = pixelCnt;
341 }
342
343 WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSize", optimalJob.stepSize, 2);
344 WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSizeX", optimalJob.stepSizeX, 2);
345 WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSizeY", optimalJob.stepSizeY, 2);
346 }
347
CreateCommandQueue()348 bool cOpenClEngine::CreateCommandQueue()
349 {
350 if (hardware->ContextCreated())
351 {
352 cl_int err;
353 bool wasNoError = true;
354
355 clQueues.clear();
356
357 for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
358 {
359 clQueues.append(std::shared_ptr<cl::CommandQueue>(new cl::CommandQueue(
360 *hardware->getContext(d), *hardware->getEnabledDevices().at(d), 0, &err)));
361
362 if (!checkErr(err, QString("Device #%1: cl::CommandQueue()").arg(d))) wasNoError = false;
363 }
364
365 if (wasNoError)
366 {
367 readyForRendering = true;
368 return true;
369 }
370 else
371 {
372 emit showErrorMessage(
373 QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("command queue")),
374 cErrorMessage::errorMessage, nullptr);
375 readyForRendering = false;
376 return false;
377 }
378 }
379 return false;
380 }
381
UpdateOptimalJobStart(quint64 pixelsLeft)382 void cOpenClEngine::UpdateOptimalJobStart(quint64 pixelsLeft)
383 {
384 optimalJob.timer.restart();
385 optimalJob.timer.start();
386 double processingCycleTime = optimalJob.optimalProcessingCycle;
387
388 optimalJob.workGroupSizeMultiplier *= processingCycleTime / optimalJob.lastProcessingTime;
389
390 quint64 maxWorkGroupSizeMultiplier = pixelsLeft / optimalJob.workGroupSize;
391
392 if (optimalJob.workGroupSizeMultiplier > maxWorkGroupSizeMultiplier)
393 optimalJob.workGroupSizeMultiplier = maxWorkGroupSizeMultiplier;
394
395 if (optimalJob.workGroupSizeMultiplier * optimalJob.workGroupSize > optimalJob.jobSizeLimit)
396 optimalJob.workGroupSizeMultiplier = optimalJob.jobSizeLimit / optimalJob.workGroupSize;
397
398 if (optimalJob.workGroupSizeMultiplier < optimalJob.workGroupSizeOptimalMultiplier)
399 optimalJob.workGroupSizeMultiplier = optimalJob.workGroupSizeOptimalMultiplier;
400
401 optimalJob.stepSize = optimalJob.workGroupSizeMultiplier * optimalJob.workGroupSize;
402
403 // qDebug() << "lastProcessingTime" << optimalJob.lastProcessingTime;
404 // qDebug() << "stepSize:" << optimalJob.stepSize;
405 }
406
Reset()407 void cOpenClEngine::Reset()
408 {
409 lastBuildParametersHash.clear();
410 lastProgramHash.clear();
411 definesCollector.clear();
412 }
413
UpdateOptimalJobEnd()414 void cOpenClEngine::UpdateOptimalJobEnd()
415 {
416 optimalJob.lastProcessingTime = optimalJob.timer.nsecsElapsed() / 1e9;
417 }
418
Lock()419 void cOpenClEngine::Lock()
420 {
421 locked = true;
422 lock.lock();
423 }
424
Unlock()425 void cOpenClEngine::Unlock()
426 {
427
428 lock.unlock();
429
430 locked = false;
431 }
432
DeleteKernelCache()433 void cOpenClEngine::DeleteKernelCache()
434 {
435 // Delete NVIDIA driver build cache
436 #ifdef _WIN32
437 QDir dir(QDir::homePath() + "/AppData/Roaming/NVIDIA/ComputeCache/");
438 #else
439 QDir dir(QDir::homePath() + "/.nv/ComputeCache/");
440 #endif
441 if (dir.exists()) dir.removeRecursively();
442 if (!dir.exists()) QDir().mkdir(dir.absolutePath());
443 }
444
PreAllocateBuffers(std::shared_ptr<const cParameterContainer> params)445 bool cOpenClEngine::PreAllocateBuffers(std::shared_ptr<const cParameterContainer> params)
446 {
447 ReleaseMemory();
448 RegisterInputOutputBuffers(params);
449
450 cl_int err;
451
452 if (hardware->ContextCreated())
453 {
454
455 for (int d = 0; d < inputAndOutputBuffers.size(); d++)
456 {
457 for (auto &inputAndOutputBuffer : inputAndOutputBuffers[d])
458 {
459 inputAndOutputBuffer.ptr.reset(
460 new char[inputAndOutputBuffer.size()], sClInputOutputBuffer::Deleter);
461 inputAndOutputBuffer.clPtr.reset(
462 new cl::Buffer(*hardware->getContext(d), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
463 inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get(), &err));
464 if (!checkErr(err, "new cl::Buffer(...) for " + inputAndOutputBuffer.name))
465 {
466 emit showErrorMessage(
467 QObject::tr("OpenCL %1 cannot be created!").arg(inputAndOutputBuffer.name),
468 cErrorMessage::errorMessage, nullptr);
469 return false;
470 }
471 }
472 }
473
474 for (int d = 0; d < outputBuffers.size(); d++)
475 {
476 for (auto &outputBuffer : outputBuffers[d])
477 {
478 outputBuffer.ptr.reset(new char[outputBuffer.size()], sClInputOutputBuffer::Deleter);
479 outputBuffer.clPtr.reset(
480 new cl::Buffer(*hardware->getContext(d), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
481 outputBuffer.size(), outputBuffer.ptr.get(), &err));
482 if (!checkErr(err, "new cl::Buffer(...) for " + outputBuffer.name))
483 {
484 emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(outputBuffer.name),
485 cErrorMessage::errorMessage, nullptr);
486 return false;
487 }
488 }
489 }
490
491 for (int d = 0; d < inputBuffers.size(); d++)
492 {
493 for (auto &inputBuffer : inputBuffers[d])
494 {
495 inputBuffer.ptr.reset(new char[inputBuffer.size()], sClInputOutputBuffer::Deleter);
496 inputBuffer.clPtr.reset(new cl::Buffer(*hardware->getContext(d),
497 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, inputBuffer.size(), inputBuffer.ptr.get(), &err));
498 if (!checkErr(err, "new cl::Buffer(...) for " + inputBuffer.name))
499 {
500 emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(inputBuffer.name),
501 cErrorMessage::errorMessage, nullptr);
502 return false;
503 }
504 }
505 }
506 }
507 else
508 {
509 emit showErrorMessage(
510 QObject::tr("OpenCL context is not ready"), cErrorMessage::errorMessage, nullptr);
511 return false;
512 }
513
514 return true;
515 }
516
ReleaseMemory()517 void cOpenClEngine::ReleaseMemory()
518 {
519 for (auto &i : outputBuffers)
520 {
521 for (auto &outputBuffer : i)
522 {
523 outputBuffer.ptr.reset();
524 outputBuffer.clPtr.reset();
525 }
526 i.clear();
527 }
528
529 for (auto &i : inputBuffers)
530 {
531 for (auto &inputBuffer : i)
532 {
533 inputBuffer.ptr.reset();
534 inputBuffer.clPtr.reset();
535 }
536 i.clear();
537 }
538
539 for (auto &i : inputAndOutputBuffers)
540 {
541 for (auto &inputAndOutputBuffer : i)
542 {
543 inputAndOutputBuffer.ptr.reset();
544 inputAndOutputBuffer.clPtr.reset();
545 }
546 i.clear();
547 }
548 }
549
WriteBuffersToQueue()550 bool cOpenClEngine::WriteBuffersToQueue()
551 {
552 for (int d = 0; d < inputBuffers.size(); d++)
553 {
554 for (auto &inputBuffer : inputBuffers[d])
555 {
556 cl_int err = clQueues[d]->enqueueWriteBuffer(
557 *inputBuffer.clPtr, CL_TRUE, 0, inputBuffer.size(), inputBuffer.ptr.get());
558 if (!checkErr(err, "CommandQueue::enqueueWriteBuffer(...) for " + inputBuffer.name))
559 {
560 emit showErrorMessage(QObject::tr("Cannot enqueue writing OpenCL %1").arg(inputBuffer.name),
561 cErrorMessage::errorMessage, nullptr);
562 return false;
563 }
564 }
565
566 int err = clQueues[d]->finish();
567 if (!checkErr(err, "CommandQueue::finish() - write buffers"))
568 {
569 emit showErrorMessage(
570 QObject::tr("Cannot finish writing OpenCL buffers"), cErrorMessage::errorMessage, nullptr);
571 return false;
572 }
573 }
574
575 for (int d = 0; d < inputAndOutputBuffers.size(); d++)
576 {
577 for (auto &inputAndOutputBuffer : inputAndOutputBuffers[d])
578 {
579 cl_int err = clQueues[d]->enqueueWriteBuffer(*inputAndOutputBuffer.clPtr, CL_TRUE, 0,
580 inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get());
581 if (!checkErr(err, "CommandQueue::enqueueWriteBuffer(...) for " + inputAndOutputBuffer.name))
582 {
583 emit showErrorMessage(
584 QObject::tr("Cannot enqueue writing OpenCL %1").arg(inputAndOutputBuffer.name),
585 cErrorMessage::errorMessage, nullptr);
586 return false;
587 }
588 }
589 int err = clQueues[d]->finish();
590 if (!checkErr(err, "CommandQueue::finish() - write buffers"))
591 {
592 emit showErrorMessage(
593 QObject::tr("Cannot finish writing OpenCL buffers"), cErrorMessage::errorMessage, nullptr);
594 return false;
595 }
596 }
597 return true;
598 }
599
ReadBuffersFromQueue(int deviceIndex)600 bool cOpenClEngine::ReadBuffersFromQueue(int deviceIndex)
601 {
602 cl_int err = 0;
603 for (auto &outputBuffer : outputBuffers[deviceIndex])
604 {
605 err = clQueues[deviceIndex]->enqueueReadBuffer(
606 *outputBuffer.clPtr, CL_FALSE, 0, outputBuffer.size(), outputBuffer.ptr.get());
607 if (!checkErr(err, "CommandQueue::enqueueReadBuffer() for " + outputBuffer.name))
608 {
609 emit showErrorMessage(
610 QObject::tr("Cannot enqueue reading OpenCL buffers %1").arg(outputBuffer.name),
611 cErrorMessage::errorMessage, nullptr);
612 return false;
613 }
614 }
615
616 if (deviceIndex < inputAndOutputBuffers.size())
617 {
618 for (auto &inputAndOutputBuffer : inputAndOutputBuffers[deviceIndex])
619 {
620 err = clQueues[deviceIndex]->enqueueReadBuffer(*inputAndOutputBuffer.clPtr, CL_FALSE, 0,
621 inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get());
622 if (!checkErr(err, "CommandQueue::enqueueReadBuffer() for " + inputAndOutputBuffer.name))
623 {
624 emit showErrorMessage(
625 QObject::tr("Cannot enqueue reading OpenCL buffers %1. \nCalculation probably took too "
626 "long and triggered timeout error in graphics driver.")
627 .arg(inputAndOutputBuffer.name),
628 cErrorMessage::errorMessage, nullptr);
629 return false;
630 }
631 }
632 }
633
634 err = clQueues[deviceIndex]->finish();
635 if (!checkErr(err, "CommandQueue::finish() - read buffers"))
636 {
637 emit showErrorMessage(
638 QObject::tr("Cannot finish reading OpenCL output buffers\nCalculation probably took too "
639 "long and triggered timeout error in graphics driver."),
640 cErrorMessage::errorMessage, nullptr);
641 return false;
642 }
643
644 return true;
645 }
646
AssignParametersToKernel(int deviceIndex)647 bool cOpenClEngine::AssignParametersToKernel(int deviceIndex)
648 {
649 uint argIterator = 0;
650 if (deviceIndex < inputBuffers.size())
651 {
652 for (auto &inputBuffer : inputBuffers[deviceIndex])
653 {
654 int err = clKernels[deviceIndex]->setArg(argIterator++, *inputBuffer.clPtr);
655 if (!checkErr(
656 err, "kernel->setArg(" + QString::number(argIterator) + ") for " + inputBuffer.name))
657 {
658 emit showErrorMessage(
659 QObject::tr("Cannot set OpenCL argument for %1").arg(inputBuffer.name),
660 cErrorMessage::errorMessage, nullptr);
661 return false;
662 }
663 }
664 }
665 for (auto &outputBuffer : outputBuffers[deviceIndex])
666 {
667 int err = clKernels[deviceIndex]->setArg(argIterator++, *outputBuffer.clPtr);
668 if (!checkErr(
669 err, "kernel->setArg(" + QString::number(argIterator) + ") for " + outputBuffer.name))
670 {
671 emit showErrorMessage(QObject::tr("Cannot set OpenCL argument for %1").arg(outputBuffer.name),
672 cErrorMessage::errorMessage, nullptr);
673 return false;
674 }
675 }
676 if (deviceIndex < inputAndOutputBuffers.size())
677 {
678 for (auto &inputAndOutputBuffer : inputAndOutputBuffers[deviceIndex])
679 {
680 int err = clKernels[deviceIndex]->setArg(argIterator++, *inputAndOutputBuffer.clPtr);
681 if (!checkErr(err, "kernel->setArg(" + QString::number(argIterator) + ") for "
682 + inputAndOutputBuffer.name))
683 {
684 emit showErrorMessage(
685 QObject::tr("Cannot set OpenCL argument for %1").arg(inputAndOutputBuffer.name),
686 cErrorMessage::errorMessage, nullptr);
687 return false;
688 }
689 }
690 }
691 return AssignParametersToKernelAdditional(argIterator, deviceIndex);
692 }
693
AddInclude(QByteArray & program,const QString & filePath)694 void cOpenClEngine::AddInclude(QByteArray &program, const QString &filePath)
695 {
696 program.append(QString("\n#include \"" + filePath + "\"\n").toUtf8());
697 }
698
699 #endif
700