1 /**
2  * Mandelbulber v2, a 3D fractal generator       ,=#MKNmMMKmmßMNWy,
3  *                                             ,B" ]L,,p%%%,,,§;, "K
4  * Copyright (C) 2017-20 Mandelbulber Team     §R-==%w["'~5]m%=L.=~5N
5  *                                        ,=mm=§M ]=4 yJKA"/-Nsaj  "Bw,==,,
6  * This file is part of Mandelbulber.    §R.r= jw",M  Km .mM  FW ",§=ß., ,TN
7  *                                     ,4R =%["w[N=7]J '"5=],""]]M,w,-; T=]M
8  * Mandelbulber is free software:     §R.ß~-Q/M=,=5"v"]=Qf,'§"M= =,M.§ Rz]M"Kw
9  * you can redistribute it and/or     §w "xDY.J ' -"m=====WeC=\ ""%""y=%"]"" §
10  * modify it under the terms of the    "§M=M =D=4"N #"%==A%p M§ M6  R' #"=~.4M
11  * GNU General Public License as        §W =, ][T"]C  §  § '§ e===~ U  !§[Z ]N
12  * published by the                    4M",,Jm=,"=e~  §  §  j]]""N  BmM"py=ßM
13  * Free Software Foundation,          ]§ T,M=& 'YmMMpM9MMM%=w=,,=MT]M m§;'§,
14  * either version 3 of the License,    TWw [.j"5=~N[=§%=%W,T ]R,"=="Y[LFT ]N
15  * or (at your option)                   TW=,-#"%=;[  =Q:["V""  ],,M.m == ]N
16  * any later version.                      J§"mr"] ,=,," =="""J]= M"M"]==ß"
17  *                                          §= "=C=4 §"eM "=B:m|4"]#F,§~
18  * Mandelbulber is distributed in            "9w=,,]w em%wJ '"~" ,=,,ß"
19  * the hope that it will be useful,                 . "K=  ,=RMMMßM"""
20  * but WITHOUT ANY WARRANTY;                            .'''
21  * without even the implied warranty
22  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23  *
24  * See the GNU General Public License for more details.
25  * You should have received a copy of the GNU General Public License
26  * along with Mandelbulber. If not, see <http://www.gnu.org/licenses/>.
27  *
28  * ###########################################################################
29  *
30  * Authors: Krzysztof Marczak (buddhi1980@gmail.com), Robert Pancoast (RobertPancoast77@gmail.com)
31  *
32  *  Created on: 3 maj 2017
33  *      Author: krzysztof
34  */
35 
36 #include "opencl_engine.h"
37 
38 #include <iostream>
39 #include <memory>
40 #include <sstream>
41 
42 #include <QCryptographicHash>
43 #include <QDebug>
44 #include <QDir>
45 #include <QElapsedTimer>
46 
47 #include "error_message.hpp"
48 #include "opencl_hardware.h"
49 #include "parameters.hpp"
50 #include "write_log.hpp"
51 
cOpenClEngine(cOpenClHardware * _hardware)52 cOpenClEngine::cOpenClEngine(cOpenClHardware *_hardware) : QObject(_hardware), hardware(_hardware)
53 {
54 #ifdef USE_OPENCL
55 	programsLoaded = false;
56 	readyForRendering = false;
57 	kernelCreated = false;
58 	locked = false;
59 	useBuildCache = true;
60 	useFastRelaxedMath = false;
61 
62 	clKernels.append(std::shared_ptr<cl::Kernel>());
63 	clQueues.append(std::shared_ptr<cl::CommandQueue>());
64 	outputBuffers.append(listOfBuffers());
65 	inputAndOutputBuffers.append(listOfBuffers());
66 	inputBuffers.append(listOfBuffers());
67 
68 #endif
69 
70 	connect(this, SIGNAL(showErrorMessage(QString, cErrorMessage::enumMessageType, QWidget *)),
71 		gErrorMessage, SLOT(slotShowMessage(QString, cErrorMessage::enumMessageType, QWidget *)));
72 }
73 
~cOpenClEngine()74 cOpenClEngine::~cOpenClEngine()
75 {
76 #ifdef USE_OPENCL
77 #endif
78 }
79 
80 #ifdef USE_OPENCL
81 
checkErr(cl_int err,QString functionName)82 bool cOpenClEngine::checkErr(cl_int err, QString functionName)
83 {
84 	if (err != CL_SUCCESS)
85 	{
86 		qCritical() << "OpenCl ERROR: " << functionName << " (" << err << ")";
87 		return false;
88 	}
89 	else
90 		return true;
91 }
92 
Build(const QByteArray & programString,QString * errorText,bool quiet)93 bool cOpenClEngine::Build(const QByteArray &programString, QString *errorText, bool quiet)
94 {
95 	if (hardware->getClDevices(0).size() > 0 && hardware->getEnabledDevices().size() > 0)
96 	{
97 		// calculating hash code of the program
98 		QCryptographicHash hashCryptProgram(QCryptographicHash::Md4);
99 		hashCryptProgram.addData(programString);
100 		// recompile also if selected devices changed
101 		for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
102 		{
103 			hashCryptProgram.addData(
104 				reinterpret_cast<char *>(&hardware->getSelectedDevicesIndices()[d]), sizeof(int));
105 		}
106 		QByteArray hashProgram = hashCryptProgram.result();
107 
108 		// calculating hash code of build parameters
109 		QCryptographicHash hashCryptBuildParams(QCryptographicHash::Md4);
110 		hashCryptBuildParams.addData(definesCollector.toLocal8Bit());
111 		QByteArray hashBuildParams = hashCryptBuildParams.result();
112 
113 		definesCollector += " -DCODEHASH=" + QString(hashProgram.toHex());
114 
115 		if (!useBuildCache) DeleteKernelCache();
116 
117 		// if program is different than in previous run
118 		if (!(hashProgram == lastProgramHash && hashBuildParams == lastBuildParametersHash
119 					&& useBuildCache))
120 		{
121 			lastBuildParametersHash = hashBuildParams;
122 			lastProgramHash = hashProgram;
123 
124 			// collecting all parts of program
125 			cl::Program::Sources sources;
126 			sources.emplace_back(programString.constData(), size_t(programString.length()));
127 
128 			// creating cl::Program
129 			cl_int err = 0;
130 
131 			// Creates a program from source strings and Context.
132 			// Context initialized with support for multiple devices.
133 			// Therefore cl::Program initialized with device vector
134 			// Does not compile or link the program.
135 
136 			clPrograms.clear();
137 			for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
138 			{
139 				clPrograms.append(
140 					std::shared_ptr<cl::Program>(new cl::Program(*hardware->getContext(d), sources, &err)));
141 			}
142 
143 			if (checkErr(err, "cl::Program()"))
144 			{
145 				std::string buildParams =
146 					"-w -cl-single-precision-constant -cl-denorms-are-zero -cl-mad-enable";
147 
148 				if (useFastRelaxedMath) buildParams += " -cl-fast-relaxed-math";
149 
150 				buildParams.append(" -DOPENCL_KERNEL_CODE");
151 
152 				buildParams += definesCollector.toUtf8().constData();
153 
154 				WriteLogString("Build parameters", buildParams.c_str(), 2);
155 
156 				// cl::Program::Build (compiles and links) a multi-device program executable
157 				// compiles and links for multiple devices simultaneously
158 
159 				for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
160 				{
161 					err = clPrograms[d]->build(hardware->getClDevices(d), buildParams.c_str());
162 				}
163 
164 				if (checkErr(err, "program->build()"))
165 				{
166 					WriteLog("OpenCl kernel program successfully compiled", 2);
167 
168 					for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
169 					{
170 						std::vector<size_t> sizes;
171 						err = clPrograms[d]->getInfo(CL_PROGRAM_BINARY_SIZES, &sizes);
172 						WriteLogInt("Program size", sizes[d], 2);
173 					}
174 					return true;
175 				}
176 				else
177 				{
178 					std::stringstream errorMessageStream;
179 					for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
180 					{
181 						errorMessageStream << "Device #" << d << "\nOpenCL Build log:\n"
182 															 << clPrograms[d]->getBuildInfo<CL_PROGRAM_BUILD_LOG>(
183 																		*hardware->getEnabledDevices().at(d))
184 															 << std::endl;
185 					}
186 					std::string buildLogText = errorMessageStream.str();
187 
188 					*errorText = QString::fromStdString(errorMessageStream.str());
189 
190 					std::cerr << buildLogText;
191 
192 					if (!quiet)
193 					{
194 						emit showErrorMessage(
195 							QObject::tr("Error during compilation of OpenCL program\n") + errorText->left(500),
196 							cErrorMessage::errorMessage, nullptr);
197 					}
198 
199 					lastBuildParametersHash.clear();
200 					lastProgramHash.clear();
201 
202 					return false;
203 				}
204 			}
205 			else
206 			{
207 				if (!quiet)
208 				{
209 					emit showErrorMessage(
210 						QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("program")),
211 						cErrorMessage::errorMessage, nullptr);
212 				}
213 				return false;
214 			}
215 		}
216 		else
217 		{
218 			WriteLog("Re-compile is not needed", 2);
219 			return true;
220 		}
221 	}
222 	else
223 	{
224 		emit showErrorMessage(QObject::tr("No devices to use for OpenCL! Check program preferences."),
225 			cErrorMessage::errorMessage, nullptr);
226 		return false;
227 	}
228 }
229 
CreateKernel4Program(std::shared_ptr<const cParameterContainer> params)230 bool cOpenClEngine::CreateKernel4Program(std::shared_ptr<const cParameterContainer> params)
231 {
232 	if (programsLoaded)
233 	{
234 		optimalJob.jobSizeMultiplier = quint64(params->Get<int>("opencl_job_size_multiplier"));
235 
236 		if (CreateKernels())
237 		{
238 			InitOptimalJob(params);
239 			return true;
240 		}
241 	}
242 	return false;
243 }
244 
CreateKernels()245 bool cOpenClEngine::CreateKernels()
246 {
247 	cl_int err;
248 	clKernels.clear();
249 	bool wasNoError = true;
250 
251 	for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
252 	{
253 		clKernels.append(std::shared_ptr<cl::Kernel>(
254 			new cl::Kernel(*clPrograms[d].get(), GetKernelName().toLatin1().constData(), &err)));
255 
256 		if (!checkErr(err, QString("Device #%1: cl::Kernel()").arg(d))) wasNoError = false;
257 	}
258 
259 	if (wasNoError)
260 	{
261 		size_t workGroupSize = 0;
262 
263 		for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
264 		{
265 			clKernels[d]->getWorkGroupInfo(
266 				*hardware->getEnabledDevices().at(d), CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize);
267 
268 			WriteLogInt("Get info for device", d, 2);
269 			WriteLogSizeT("CL_KERNEL_WORK_GROUP_SIZE", workGroupSize, 2);
270 
271 			size_t workGroupSizeOptimalMultiplier = 0;
272 
273 			clKernels[d]->getWorkGroupInfo(*hardware->getEnabledDevices().at(d),
274 				CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &workGroupSizeOptimalMultiplier);
275 			WriteLogSizeT(
276 				"CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE", workGroupSizeOptimalMultiplier, 2);
277 
278 			if (d == 0)
279 			{
280 				optimalJob.workGroupSize = workGroupSize;
281 				optimalJob.workGroupSizeOptimalMultiplier =
282 					workGroupSizeOptimalMultiplier * optimalJob.jobSizeMultiplier;
283 			}
284 			else
285 			{
286 				optimalJob.workGroupSize = qMin(quint64(workGroupSize), optimalJob.workGroupSize);
287 				optimalJob.workGroupSizeOptimalMultiplier =
288 					qMax(workGroupSizeOptimalMultiplier * optimalJob.jobSizeMultiplier,
289 						optimalJob.workGroupSizeOptimalMultiplier);
290 				;
291 			}
292 		}
293 
294 		kernelCreated = true;
295 		return true;
296 	}
297 	else
298 	{
299 		emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("kernel")),
300 			cErrorMessage::errorMessage, nullptr);
301 		kernelCreated = false;
302 	}
303 	return false;
304 }
305 
InitOptimalJob(std::shared_ptr<const cParameterContainer> params)306 void cOpenClEngine::InitOptimalJob(std::shared_ptr<const cParameterContainer> params)
307 {
308 	quint64 width = params->Get<int>("image_width");
309 	quint64 height = params->Get<int>("image_height");
310 	quint64 memoryLimitByUser = params->Get<int>("opencl_memory_limit") * 1024UL * 1024UL;
311 	quint64 pixelCnt = width * height;
312 
313 	cOpenClDevice::sDeviceInformation deviceInfo = hardware->getSelectedDevicesInformation().at(0);
314 
315 	optimalJob.stepSize = optimalJob.workGroupSize * optimalJob.workGroupSizeOptimalMultiplier;
316 
317 	qint64 exp = qint64(log(sqrt(double(optimalJob.stepSize + 1))) / log(2.0));
318 
319 	optimalJob.stepSizeX = quint64(pow(2.0, double(exp)));
320 	optimalJob.stepSizeY = optimalJob.stepSize / optimalJob.stepSizeX;
321 
322 	//	optimalJob.stepSizeX = 1;
323 	//	optimalJob.stepSizeY = 1;
324 
325 	optimalJob.workGroupSizeMultiplier = optimalJob.workGroupSizeOptimalMultiplier;
326 	optimalJob.lastProcessingTime = 1.0;
327 
328 	quint64 maxAllocMemSize = quint64(deviceInfo.maxMemAllocSize);
329 	quint64 memSize = memoryLimitByUser;
330 	if (maxAllocMemSize > 0 && maxAllocMemSize * 0.75 < memoryLimitByUser)
331 	{
332 		memSize = quint64(maxAllocMemSize * 0.75);
333 	}
334 	if (optimalJob.sizeOfPixel != 0)
335 	{
336 		optimalJob.jobSizeLimit = memSize / optimalJob.sizeOfPixel;
337 	}
338 	else
339 	{
340 		optimalJob.jobSizeLimit = pixelCnt;
341 	}
342 
343 	WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSize", optimalJob.stepSize, 2);
344 	WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSizeX", optimalJob.stepSizeX, 2);
345 	WriteLogSizeT("cOpenClEngine::InitOptimalJob(): stepSizeY", optimalJob.stepSizeY, 2);
346 }
347 
CreateCommandQueue()348 bool cOpenClEngine::CreateCommandQueue()
349 {
350 	if (hardware->ContextCreated())
351 	{
352 		cl_int err;
353 		bool wasNoError = true;
354 
355 		clQueues.clear();
356 
357 		for (int d = 0; d < hardware->getEnabledDevices().size(); d++)
358 		{
359 			clQueues.append(std::shared_ptr<cl::CommandQueue>(new cl::CommandQueue(
360 				*hardware->getContext(d), *hardware->getEnabledDevices().at(d), 0, &err)));
361 
362 			if (!checkErr(err, QString("Device #%1: cl::CommandQueue()").arg(d))) wasNoError = false;
363 		}
364 
365 		if (wasNoError)
366 		{
367 			readyForRendering = true;
368 			return true;
369 		}
370 		else
371 		{
372 			emit showErrorMessage(
373 				QObject::tr("OpenCL %1 cannot be created!").arg(QObject::tr("command queue")),
374 				cErrorMessage::errorMessage, nullptr);
375 			readyForRendering = false;
376 			return false;
377 		}
378 	}
379 	return false;
380 }
381 
UpdateOptimalJobStart(quint64 pixelsLeft)382 void cOpenClEngine::UpdateOptimalJobStart(quint64 pixelsLeft)
383 {
384 	optimalJob.timer.restart();
385 	optimalJob.timer.start();
386 	double processingCycleTime = optimalJob.optimalProcessingCycle;
387 
388 	optimalJob.workGroupSizeMultiplier *= processingCycleTime / optimalJob.lastProcessingTime;
389 
390 	quint64 maxWorkGroupSizeMultiplier = pixelsLeft / optimalJob.workGroupSize;
391 
392 	if (optimalJob.workGroupSizeMultiplier > maxWorkGroupSizeMultiplier)
393 		optimalJob.workGroupSizeMultiplier = maxWorkGroupSizeMultiplier;
394 
395 	if (optimalJob.workGroupSizeMultiplier * optimalJob.workGroupSize > optimalJob.jobSizeLimit)
396 		optimalJob.workGroupSizeMultiplier = optimalJob.jobSizeLimit / optimalJob.workGroupSize;
397 
398 	if (optimalJob.workGroupSizeMultiplier < optimalJob.workGroupSizeOptimalMultiplier)
399 		optimalJob.workGroupSizeMultiplier = optimalJob.workGroupSizeOptimalMultiplier;
400 
401 	optimalJob.stepSize = optimalJob.workGroupSizeMultiplier * optimalJob.workGroupSize;
402 
403 	//	qDebug() << "lastProcessingTime" << optimalJob.lastProcessingTime;
404 	//	qDebug() << "stepSize:" << optimalJob.stepSize;
405 }
406 
Reset()407 void cOpenClEngine::Reset()
408 {
409 	lastBuildParametersHash.clear();
410 	lastProgramHash.clear();
411 	definesCollector.clear();
412 }
413 
UpdateOptimalJobEnd()414 void cOpenClEngine::UpdateOptimalJobEnd()
415 {
416 	optimalJob.lastProcessingTime = optimalJob.timer.nsecsElapsed() / 1e9;
417 }
418 
Lock()419 void cOpenClEngine::Lock()
420 {
421 	locked = true;
422 	lock.lock();
423 }
424 
Unlock()425 void cOpenClEngine::Unlock()
426 {
427 
428 	lock.unlock();
429 
430 	locked = false;
431 }
432 
DeleteKernelCache()433 void cOpenClEngine::DeleteKernelCache()
434 {
435 // Delete NVIDIA driver build cache
436 #ifdef _WIN32
437 	QDir dir(QDir::homePath() + "/AppData/Roaming/NVIDIA/ComputeCache/");
438 #else
439 	QDir dir(QDir::homePath() + "/.nv/ComputeCache/");
440 #endif
441 	if (dir.exists()) dir.removeRecursively();
442 	if (!dir.exists()) QDir().mkdir(dir.absolutePath());
443 }
444 
PreAllocateBuffers(std::shared_ptr<const cParameterContainer> params)445 bool cOpenClEngine::PreAllocateBuffers(std::shared_ptr<const cParameterContainer> params)
446 {
447 	ReleaseMemory();
448 	RegisterInputOutputBuffers(params);
449 
450 	cl_int err;
451 
452 	if (hardware->ContextCreated())
453 	{
454 
455 		for (int d = 0; d < inputAndOutputBuffers.size(); d++)
456 		{
457 			for (auto &inputAndOutputBuffer : inputAndOutputBuffers[d])
458 			{
459 				inputAndOutputBuffer.ptr.reset(
460 					new char[inputAndOutputBuffer.size()], sClInputOutputBuffer::Deleter);
461 				inputAndOutputBuffer.clPtr.reset(
462 					new cl::Buffer(*hardware->getContext(d), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
463 						inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get(), &err));
464 				if (!checkErr(err, "new cl::Buffer(...) for " + inputAndOutputBuffer.name))
465 				{
466 					emit showErrorMessage(
467 						QObject::tr("OpenCL %1 cannot be created!").arg(inputAndOutputBuffer.name),
468 						cErrorMessage::errorMessage, nullptr);
469 					return false;
470 				}
471 			}
472 		}
473 
474 		for (int d = 0; d < outputBuffers.size(); d++)
475 		{
476 			for (auto &outputBuffer : outputBuffers[d])
477 			{
478 				outputBuffer.ptr.reset(new char[outputBuffer.size()], sClInputOutputBuffer::Deleter);
479 				outputBuffer.clPtr.reset(
480 					new cl::Buffer(*hardware->getContext(d), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
481 						outputBuffer.size(), outputBuffer.ptr.get(), &err));
482 				if (!checkErr(err, "new cl::Buffer(...) for " + outputBuffer.name))
483 				{
484 					emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(outputBuffer.name),
485 						cErrorMessage::errorMessage, nullptr);
486 					return false;
487 				}
488 			}
489 		}
490 
491 		for (int d = 0; d < inputBuffers.size(); d++)
492 		{
493 			for (auto &inputBuffer : inputBuffers[d])
494 			{
495 				inputBuffer.ptr.reset(new char[inputBuffer.size()], sClInputOutputBuffer::Deleter);
496 				inputBuffer.clPtr.reset(new cl::Buffer(*hardware->getContext(d),
497 					CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, inputBuffer.size(), inputBuffer.ptr.get(), &err));
498 				if (!checkErr(err, "new cl::Buffer(...) for " + inputBuffer.name))
499 				{
500 					emit showErrorMessage(QObject::tr("OpenCL %1 cannot be created!").arg(inputBuffer.name),
501 						cErrorMessage::errorMessage, nullptr);
502 					return false;
503 				}
504 			}
505 		}
506 	}
507 	else
508 	{
509 		emit showErrorMessage(
510 			QObject::tr("OpenCL context is not ready"), cErrorMessage::errorMessage, nullptr);
511 		return false;
512 	}
513 
514 	return true;
515 }
516 
ReleaseMemory()517 void cOpenClEngine::ReleaseMemory()
518 {
519 	for (auto &i : outputBuffers)
520 	{
521 		for (auto &outputBuffer : i)
522 		{
523 			outputBuffer.ptr.reset();
524 			outputBuffer.clPtr.reset();
525 		}
526 		i.clear();
527 	}
528 
529 	for (auto &i : inputBuffers)
530 	{
531 		for (auto &inputBuffer : i)
532 		{
533 			inputBuffer.ptr.reset();
534 			inputBuffer.clPtr.reset();
535 		}
536 		i.clear();
537 	}
538 
539 	for (auto &i : inputAndOutputBuffers)
540 	{
541 		for (auto &inputAndOutputBuffer : i)
542 		{
543 			inputAndOutputBuffer.ptr.reset();
544 			inputAndOutputBuffer.clPtr.reset();
545 		}
546 		i.clear();
547 	}
548 }
549 
WriteBuffersToQueue()550 bool cOpenClEngine::WriteBuffersToQueue()
551 {
552 	for (int d = 0; d < inputBuffers.size(); d++)
553 	{
554 		for (auto &inputBuffer : inputBuffers[d])
555 		{
556 			cl_int err = clQueues[d]->enqueueWriteBuffer(
557 				*inputBuffer.clPtr, CL_TRUE, 0, inputBuffer.size(), inputBuffer.ptr.get());
558 			if (!checkErr(err, "CommandQueue::enqueueWriteBuffer(...) for " + inputBuffer.name))
559 			{
560 				emit showErrorMessage(QObject::tr("Cannot enqueue writing OpenCL %1").arg(inputBuffer.name),
561 					cErrorMessage::errorMessage, nullptr);
562 				return false;
563 			}
564 		}
565 
566 		int err = clQueues[d]->finish();
567 		if (!checkErr(err, "CommandQueue::finish() - write buffers"))
568 		{
569 			emit showErrorMessage(
570 				QObject::tr("Cannot finish writing OpenCL buffers"), cErrorMessage::errorMessage, nullptr);
571 			return false;
572 		}
573 	}
574 
575 	for (int d = 0; d < inputAndOutputBuffers.size(); d++)
576 	{
577 		for (auto &inputAndOutputBuffer : inputAndOutputBuffers[d])
578 		{
579 			cl_int err = clQueues[d]->enqueueWriteBuffer(*inputAndOutputBuffer.clPtr, CL_TRUE, 0,
580 				inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get());
581 			if (!checkErr(err, "CommandQueue::enqueueWriteBuffer(...) for " + inputAndOutputBuffer.name))
582 			{
583 				emit showErrorMessage(
584 					QObject::tr("Cannot enqueue writing OpenCL %1").arg(inputAndOutputBuffer.name),
585 					cErrorMessage::errorMessage, nullptr);
586 				return false;
587 			}
588 		}
589 		int err = clQueues[d]->finish();
590 		if (!checkErr(err, "CommandQueue::finish() - write buffers"))
591 		{
592 			emit showErrorMessage(
593 				QObject::tr("Cannot finish writing OpenCL buffers"), cErrorMessage::errorMessage, nullptr);
594 			return false;
595 		}
596 	}
597 	return true;
598 }
599 
ReadBuffersFromQueue(int deviceIndex)600 bool cOpenClEngine::ReadBuffersFromQueue(int deviceIndex)
601 {
602 	cl_int err = 0;
603 	for (auto &outputBuffer : outputBuffers[deviceIndex])
604 	{
605 		err = clQueues[deviceIndex]->enqueueReadBuffer(
606 			*outputBuffer.clPtr, CL_FALSE, 0, outputBuffer.size(), outputBuffer.ptr.get());
607 		if (!checkErr(err, "CommandQueue::enqueueReadBuffer() for " + outputBuffer.name))
608 		{
609 			emit showErrorMessage(
610 				QObject::tr("Cannot enqueue reading OpenCL buffers %1").arg(outputBuffer.name),
611 				cErrorMessage::errorMessage, nullptr);
612 			return false;
613 		}
614 	}
615 
616 	if (deviceIndex < inputAndOutputBuffers.size())
617 	{
618 		for (auto &inputAndOutputBuffer : inputAndOutputBuffers[deviceIndex])
619 		{
620 			err = clQueues[deviceIndex]->enqueueReadBuffer(*inputAndOutputBuffer.clPtr, CL_FALSE, 0,
621 				inputAndOutputBuffer.size(), inputAndOutputBuffer.ptr.get());
622 			if (!checkErr(err, "CommandQueue::enqueueReadBuffer() for " + inputAndOutputBuffer.name))
623 			{
624 				emit showErrorMessage(
625 					QObject::tr("Cannot enqueue reading OpenCL buffers %1. \nCalculation probably took too "
626 											"long and triggered timeout error in graphics driver.")
627 						.arg(inputAndOutputBuffer.name),
628 					cErrorMessage::errorMessage, nullptr);
629 				return false;
630 			}
631 		}
632 	}
633 
634 	err = clQueues[deviceIndex]->finish();
635 	if (!checkErr(err, "CommandQueue::finish() - read buffers"))
636 	{
637 		emit showErrorMessage(
638 			QObject::tr("Cannot finish reading OpenCL output buffers\nCalculation probably took too "
639 									"long and triggered timeout error in graphics driver."),
640 			cErrorMessage::errorMessage, nullptr);
641 		return false;
642 	}
643 
644 	return true;
645 }
646 
AssignParametersToKernel(int deviceIndex)647 bool cOpenClEngine::AssignParametersToKernel(int deviceIndex)
648 {
649 	uint argIterator = 0;
650 	if (deviceIndex < inputBuffers.size())
651 	{
652 		for (auto &inputBuffer : inputBuffers[deviceIndex])
653 		{
654 			int err = clKernels[deviceIndex]->setArg(argIterator++, *inputBuffer.clPtr);
655 			if (!checkErr(
656 						err, "kernel->setArg(" + QString::number(argIterator) + ") for " + inputBuffer.name))
657 			{
658 				emit showErrorMessage(
659 					QObject::tr("Cannot set OpenCL argument for %1").arg(inputBuffer.name),
660 					cErrorMessage::errorMessage, nullptr);
661 				return false;
662 			}
663 		}
664 	}
665 	for (auto &outputBuffer : outputBuffers[deviceIndex])
666 	{
667 		int err = clKernels[deviceIndex]->setArg(argIterator++, *outputBuffer.clPtr);
668 		if (!checkErr(
669 					err, "kernel->setArg(" + QString::number(argIterator) + ") for " + outputBuffer.name))
670 		{
671 			emit showErrorMessage(QObject::tr("Cannot set OpenCL argument for %1").arg(outputBuffer.name),
672 				cErrorMessage::errorMessage, nullptr);
673 			return false;
674 		}
675 	}
676 	if (deviceIndex < inputAndOutputBuffers.size())
677 	{
678 		for (auto &inputAndOutputBuffer : inputAndOutputBuffers[deviceIndex])
679 		{
680 			int err = clKernels[deviceIndex]->setArg(argIterator++, *inputAndOutputBuffer.clPtr);
681 			if (!checkErr(err, "kernel->setArg(" + QString::number(argIterator) + ") for "
682 													 + inputAndOutputBuffer.name))
683 			{
684 				emit showErrorMessage(
685 					QObject::tr("Cannot set OpenCL argument for %1").arg(inputAndOutputBuffer.name),
686 					cErrorMessage::errorMessage, nullptr);
687 				return false;
688 			}
689 		}
690 	}
691 	return AssignParametersToKernelAdditional(argIterator, deviceIndex);
692 }
693 
AddInclude(QByteArray & program,const QString & filePath)694 void cOpenClEngine::AddInclude(QByteArray &program, const QString &filePath)
695 {
696 	program.append(QString("\n#include \"" + filePath + "\"\n").toUtf8());
697 }
698 
699 #endif
700