1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/command_stream/command_stream_receiver_hw.h"
9 #include "shared/source/helpers/aligned_memory.h"
10 #include "shared/source/memory_manager/os_agnostic_memory_manager.h"
11 #include "shared/test/common/libult/ult_command_stream_receiver.h"
12 #include "shared/test/common/test_macros/test.h"
13 #include "shared/test/common/test_macros/test_checks_shared.h"
14 
15 #include "opencl/source/command_queue/command_queue_hw.h"
16 #include "opencl/source/helpers/cl_memory_properties_helpers.h"
17 #include "opencl/source/kernel/kernel.h"
18 #include "opencl/source/mem_obj/buffer.h"
19 #include "opencl/source/mem_obj/image.h"
20 #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
21 #include "opencl/test/unit_test/mocks/mock_context.h"
22 
23 using namespace NEO;
24 
25 namespace ULT {
26 
27 template <typename FamilyType>
28 class CommandStreamReceiverMock : public UltCommandStreamReceiver<FamilyType> {
29   private:
30     std::vector<GraphicsAllocation *> toFree; // pointers to be freed on destruction
31     Device *pDevice;
32     ClDevice *pClDevice;
33 
34   public:
35     size_t expectedToFreeCount = (size_t)-1;
CommandStreamReceiverMock(Device * pDevice)36     CommandStreamReceiverMock(Device *pDevice) : UltCommandStreamReceiver<FamilyType>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()) {
37         this->pDevice = pDevice;
38         this->pClDevice = pDevice->getSpecializedDevice<ClDevice>();
39     }
40 
flush(BatchBuffer & batchBuffer,ResidencyContainer & allocationsForResidency)41     bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
42         EXPECT_NE(nullptr, batchBuffer.commandBufferAllocation->getUnderlyingBuffer());
43 
44         toFree.push_back(batchBuffer.commandBufferAllocation);
45         batchBuffer.stream->replaceBuffer(nullptr, 0);
46         batchBuffer.stream->replaceGraphicsAllocation(nullptr);
47 
48         EXPECT_TRUE(this->ownershipMutex.try_lock());
49         this->ownershipMutex.unlock();
50         return true;
51     }
52 
~CommandStreamReceiverMock()53     ~CommandStreamReceiverMock() override {
54         EXPECT_FALSE(pClDevice->hasOwnership());
55         if (expectedToFreeCount == (size_t)-1) {
56             EXPECT_GT(toFree.size(), 0u); //make sure flush was called
57         } else {
58             EXPECT_EQ(toFree.size(), expectedToFreeCount);
59         }
60 
61         auto memoryManager = this->getMemoryManager();
62         //Now free memory. if CQ/CSR did the same, we will hit double-free
63         for (auto p : toFree)
64             memoryManager->freeGraphicsMemory(p);
65     }
66 };
67 
68 struct EnqueueThreadingFixture : public ClDeviceFixture {
SetUpULT::EnqueueThreadingFixture69     void SetUp() {
70         ClDeviceFixture::SetUp();
71         context = new MockContext(pClDevice);
72         pCmdQ = nullptr;
73     }
74 
TearDownULT::EnqueueThreadingFixture75     void TearDown() {
76         delete pCmdQ;
77         context->release();
78         ClDeviceFixture::TearDown();
79     }
80 
81     template <typename FamilyType>
82     class MyCommandQueue : public CommandQueueHw<FamilyType> {
83       public:
MyCommandQueue(Context * context,ClDevice * device,const cl_queue_properties * props)84         MyCommandQueue(Context *context,
85                        ClDevice *device,
86                        const cl_queue_properties *props) : CommandQueueHw<FamilyType>(context, device, props, false), kernel(nullptr) {
87         }
88 
create(Context * context,ClDevice * device,cl_command_queue_properties props)89         static CommandQueue *create(Context *context,
90                                     ClDevice *device,
91                                     cl_command_queue_properties props) {
92             const cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, props, 0};
93             return new MyCommandQueue<FamilyType>(context, device, properties);
94         }
95 
96       protected:
~MyCommandQueue()97         ~MyCommandQueue() override {
98             if (kernel) {
99                 EXPECT_FALSE(kernel->getMultiDeviceKernel()->hasOwnership());
100             }
101         }
enqueueHandlerHook(const unsigned int commandType,const MultiDispatchInfo & multiDispatchInfo)102         void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override {
103             for (auto &dispatchInfo : multiDispatchInfo) {
104                 auto &kernel = *dispatchInfo.getKernel();
105                 EXPECT_TRUE(kernel.getMultiDeviceKernel()->hasOwnership());
106             }
107         }
108 
109         Kernel *kernel;
110     };
111 
112     CommandQueue *pCmdQ;
113     MockContext *context;
114 
115     template <typename FamilyType>
createCQULT::EnqueueThreadingFixture116     void createCQ() {
117         pCmdQ = MyCommandQueue<FamilyType>::create(context, pClDevice, 0);
118         ASSERT_NE(nullptr, pCmdQ);
119 
120         auto pCommandStreamReceiver = new CommandStreamReceiverMock<FamilyType>(pDevice);
121         pDevice->resetCommandStreamReceiver(pCommandStreamReceiver);
122     }
123 };
124 
125 typedef Test<EnqueueThreadingFixture> EnqueueThreading;
126 
127 struct EnqueueThreadingImage : EnqueueThreading {
SetUpULT::EnqueueThreadingImage128     void SetUp() override {
129         REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
130         EnqueueThreading::SetUp();
131     }
TearDownULT::EnqueueThreadingImage132     void TearDown() override {
133         if (!IsSkipped()) {
134             EnqueueThreading::TearDown();
135         }
136     }
137 };
138 
HWTEST_F(EnqueueThreading,WhenEnqueuingReadBufferThenKernelHasOwnership)139 HWTEST_F(EnqueueThreading, WhenEnqueuingReadBufferThenKernelHasOwnership) {
140     createCQ<FamilyType>();
141 
142     cl_int retVal;
143     std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
144     ASSERT_NE(nullptr, buffer.get());
145 
146     void *ptr = ::alignedMalloc(1024u, 4096);
147     ASSERT_NE(nullptr, ptr);
148 
149     buffer->forceDisallowCPUCopy = true;
150     pCmdQ->enqueueReadBuffer(buffer.get(),
151                              true,
152                              0,
153                              1024u,
154                              ptr,
155                              nullptr,
156                              0,
157                              nullptr,
158                              nullptr);
159 
160     alignedFree(ptr);
161 }
162 
HWTEST_F(EnqueueThreading,WhenEnqueuingWriteBufferThenKernelHasOwnership)163 HWTEST_F(EnqueueThreading, WhenEnqueuingWriteBufferThenKernelHasOwnership) {
164     createCQ<FamilyType>();
165 
166     cl_int retVal;
167     std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
168     ASSERT_NE(nullptr, buffer.get());
169 
170     void *ptr = ::alignedMalloc(1024u, 4096);
171     ASSERT_NE(nullptr, ptr);
172 
173     buffer->forceDisallowCPUCopy = true;
174     pCmdQ->enqueueWriteBuffer(buffer.get(),
175                               true,
176                               0,
177                               1024u,
178                               ptr,
179                               nullptr,
180                               0,
181                               nullptr,
182                               nullptr);
183 
184     alignedFree(ptr);
185 }
186 
HWTEST_F(EnqueueThreading,WhenEnqueuingCopyBufferThenKernelHasOwnership)187 HWTEST_F(EnqueueThreading, WhenEnqueuingCopyBufferThenKernelHasOwnership) {
188     createCQ<FamilyType>();
189 
190     cl_int retVal;
191     std::unique_ptr<Buffer> srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
192     ASSERT_NE(nullptr, srcBuffer.get());
193     std::unique_ptr<Buffer> dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
194     ASSERT_NE(nullptr, dstBuffer.get());
195 
196     pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, 1024u, 0, nullptr, nullptr);
197 }
198 
HWTEST_F(EnqueueThreading,WhenEnqueuingCopyBufferRectThenKernelHasOwnership)199 HWTEST_F(EnqueueThreading, WhenEnqueuingCopyBufferRectThenKernelHasOwnership) {
200     createCQ<FamilyType>();
201 
202     cl_int retVal;
203     std::unique_ptr<Buffer> srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
204     ASSERT_NE(nullptr, srcBuffer.get());
205     std::unique_ptr<Buffer> dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
206     ASSERT_NE(nullptr, dstBuffer.get());
207 
208     size_t srcOrigin[3] = {1024u, 1, 0};
209     size_t dstOrigin[3] = {1024u, 1, 0};
210     size_t region[3] = {1024u, 1, 1};
211 
212     pCmdQ->enqueueCopyBufferRect(srcBuffer.get(), dstBuffer.get(), srcOrigin, dstOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
213 }
214 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingCopyBufferToImageThenKernelHasOwnership)215 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyBufferToImageThenKernelHasOwnership) {
216     createCQ<FamilyType>();
217     cl_int retVal;
218 
219     std::unique_ptr<Buffer> srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
220     ASSERT_NE(nullptr, srcBuffer.get());
221     cl_image_format imageFormat;
222     imageFormat.image_channel_data_type = CL_UNORM_INT8;
223     imageFormat.image_channel_order = CL_R;
224 
225     cl_image_desc imageDesc;
226     memset(&imageDesc, 0, sizeof(imageDesc));
227 
228     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
229     imageDesc.image_width = 1024u;
230 
231     cl_mem_flags flags = CL_MEM_WRITE_ONLY;
232     auto surfaceFormat = Image::getSurfaceFormatFromTable(
233         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
234     std::unique_ptr<Image> dstImage(
235         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
236                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
237     ASSERT_NE(nullptr, dstImage.get());
238 
239     size_t dstOrigin[3] = {1024u, 1, 0};
240     size_t region[3] = {1024u, 1, 1};
241 
242     pCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, region, 0, nullptr, nullptr);
243 }
244 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingCopyImageThenKernelHasOwnership)245 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyImageThenKernelHasOwnership) {
246     createCQ<FamilyType>();
247     cl_int retVal;
248 
249     cl_image_format imageFormat;
250     imageFormat.image_channel_data_type = CL_UNORM_INT8;
251     imageFormat.image_channel_order = CL_R;
252 
253     cl_image_desc imageDesc;
254     memset(&imageDesc, 0, sizeof(imageDesc));
255 
256     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
257     imageDesc.image_width = 1024u;
258     cl_mem_flags flags = CL_MEM_WRITE_ONLY;
259     auto surfaceFormat = Image::getSurfaceFormatFromTable(
260         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
261     std::unique_ptr<Image> srcImage(
262         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
263                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
264     ASSERT_NE(nullptr, srcImage.get());
265     std::unique_ptr<Image> dstImage(
266         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
267                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
268     ASSERT_NE(nullptr, srcImage.get());
269 
270     size_t srcOrigin[3] = {1024u, 1, 0};
271     size_t dstOrigin[3] = {1024u, 1, 0};
272     size_t region[3] = {1024u, 1, 1};
273 
274     pCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr);
275 }
276 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingCopyImageToBufferThenKernelHasOwnership)277 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyImageToBufferThenKernelHasOwnership) {
278     createCQ<FamilyType>();
279     cl_int retVal;
280 
281     cl_image_format imageFormat;
282     imageFormat.image_channel_data_type = CL_UNORM_INT8;
283     imageFormat.image_channel_order = CL_R;
284 
285     cl_image_desc imageDesc;
286     memset(&imageDesc, 0, sizeof(imageDesc));
287 
288     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
289     imageDesc.image_width = 1024u;
290 
291     cl_mem_flags flags = CL_MEM_WRITE_ONLY;
292     auto surfaceFormat = Image::getSurfaceFormatFromTable(
293         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
294     std::unique_ptr<Image> srcImage(
295         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
296                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
297     ASSERT_NE(nullptr, srcImage.get());
298 
299     std::unique_ptr<Buffer> dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
300     ASSERT_NE(nullptr, dstBuffer.get());
301 
302     size_t srcOrigin[3] = {1024u, 1, 0};
303     size_t region[3] = {1024u, 1, 1};
304 
305     pCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, region, 0, 0, nullptr, nullptr);
306 }
307 
HWTEST_F(EnqueueThreading,WhenEnqueuingFillBufferThenKernelHasOwnership)308 HWTEST_F(EnqueueThreading, WhenEnqueuingFillBufferThenKernelHasOwnership) {
309     createCQ<FamilyType>();
310     cl_int retVal;
311 
312     std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
313     ASSERT_NE(nullptr, buffer.get());
314 
315     cl_int pattern = 0xDEADBEEF;
316     pCmdQ->enqueueFillBuffer(buffer.get(), &pattern, sizeof(pattern), 0, 1024u, 0, nullptr, nullptr);
317 }
318 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingFillImageThenKernelHasOwnership)319 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingFillImageThenKernelHasOwnership) {
320     createCQ<FamilyType>();
321     cl_int retVal;
322 
323     cl_image_format imageFormat;
324     imageFormat.image_channel_data_type = CL_UNORM_INT8;
325     imageFormat.image_channel_order = CL_R;
326 
327     cl_image_desc imageDesc;
328     memset(&imageDesc, 0, sizeof(imageDesc));
329 
330     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
331     imageDesc.image_width = 1024u;
332 
333     cl_mem_flags flags = CL_MEM_WRITE_ONLY;
334     auto surfaceFormat = Image::getSurfaceFormatFromTable(
335         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
336     std::unique_ptr<Image> image(
337         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
338                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
339     ASSERT_NE(nullptr, image.get());
340 
341     size_t origin[3] = {1024u, 1, 0};
342     size_t region[3] = {1024u, 1, 1};
343 
344     int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC};
345 
346     pCmdQ->enqueueFillImage(image.get(), &fillColor, origin, region, 0, nullptr, nullptr);
347 }
348 
HWTEST_F(EnqueueThreading,WhenEnqueuingReadBufferRectThenKernelHasOwnership)349 HWTEST_F(EnqueueThreading, WhenEnqueuingReadBufferRectThenKernelHasOwnership) {
350     createCQ<FamilyType>();
351     cl_int retVal;
352 
353     std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
354     ASSERT_NE(nullptr, buffer.get());
355 
356     void *ptr = ::alignedMalloc(1024u, 4096);
357     ASSERT_NE(nullptr, ptr);
358 
359     size_t bufferOrigin[3] = {1024u, 1, 0};
360     size_t hostOrigin[3] = {1024u, 1, 0};
361     size_t region[3] = {1024u, 1, 1};
362 
363     pCmdQ->enqueueReadBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
364 
365     ::alignedFree(ptr);
366 }
367 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingReadImageThenKernelHasOwnership)368 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingReadImageThenKernelHasOwnership) {
369     createCQ<FamilyType>();
370     cl_int retVal;
371 
372     cl_image_format imageFormat;
373     imageFormat.image_channel_data_type = CL_UNORM_INT8;
374     imageFormat.image_channel_order = CL_R;
375 
376     cl_image_desc imageDesc;
377     memset(&imageDesc, 0, sizeof(imageDesc));
378 
379     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
380     imageDesc.image_width = 1024u;
381 
382     cl_mem_flags flags = CL_MEM_WRITE_ONLY;
383     auto surfaceFormat = Image::getSurfaceFormatFromTable(
384         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
385     std::unique_ptr<Image> image(Image::create(
386         context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
387         flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
388     ASSERT_NE(nullptr, image.get());
389 
390     void *ptr = ::alignedMalloc(1024u, 4096);
391     ASSERT_NE(nullptr, ptr);
392 
393     size_t origin[3] = {1024u, 1, 0};
394     size_t region[3] = {1024u, 1, 1};
395 
396     pCmdQ->enqueueReadImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr);
397 
398     ::alignedFree(ptr);
399 }
400 
HWTEST_F(EnqueueThreading,WhenEnqueuingWriteBufferRectThenKernelHasOwnership)401 HWTEST_F(EnqueueThreading, WhenEnqueuingWriteBufferRectThenKernelHasOwnership) {
402     createCQ<FamilyType>();
403     cl_int retVal;
404 
405     std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal));
406     ASSERT_NE(nullptr, buffer.get());
407 
408     size_t bufferOrigin[3] = {1024u, 1, 0};
409     size_t hostOrigin[3] = {1024u, 1, 0};
410     size_t region[3] = {1024u, 1, 1};
411 
412     auto hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, 0, 0);
413     void *ptr = ::alignedMalloc(hostPtrSize, MemoryConstants::pageSize);
414     ASSERT_NE(nullptr, ptr);
415 
416     pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
417 
418     ::alignedFree(ptr);
419 }
420 
HWTEST_F(EnqueueThreadingImage,WhenEnqueuingWriteImageThenKernelHasOwnership)421 HWTEST_F(EnqueueThreadingImage, WhenEnqueuingWriteImageThenKernelHasOwnership) {
422     createCQ<FamilyType>();
423     cl_int retVal;
424 
425     cl_image_format imageFormat;
426     imageFormat.image_channel_data_type = CL_UNORM_INT8;
427     imageFormat.image_channel_order = CL_R;
428 
429     cl_image_desc imageDesc;
430     memset(&imageDesc, 0, sizeof(imageDesc));
431 
432     imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D;
433     imageDesc.image_width = 1024u;
434 
435     cl_mem_flags flags = CL_MEM_READ_ONLY;
436     auto surfaceFormat = Image::getSurfaceFormatFromTable(
437         flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features);
438     std::unique_ptr<Image> image(
439         Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
440                       flags, 0, surfaceFormat, &imageDesc, nullptr, retVal));
441     ASSERT_NE(nullptr, image.get());
442 
443     void *ptr = ::alignedMalloc(1024u, 4096);
444     ASSERT_NE(nullptr, ptr);
445 
446     size_t origin[3] = {1024u, 1, 0};
447     size_t region[3] = {1024u, 1, 1};
448 
449     pCmdQ->enqueueWriteImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr);
450 
451     ::alignedFree(ptr);
452 }
453 
HWTEST_F(EnqueueThreading,WhenFinishingThenKernelHasOwnership)454 HWTEST_F(EnqueueThreading, WhenFinishingThenKernelHasOwnership) {
455     createCQ<FamilyType>();
456 
457     // set something to finish
458     pCmdQ->taskCount = 1;
459     pCmdQ->taskLevel = 1;
460     auto csr = (CommandStreamReceiverMock<FamilyType> *)&this->pCmdQ->getGpgpuCommandStreamReceiver();
461     csr->expectedToFreeCount = 0u;
462     csr->latestSentTaskCount = 1;
463     csr->latestFlushedTaskCount = 1;
464 
465     pCmdQ->finish();
466 }
467 } // namespace ULT
468