1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/helpers/local_work_size.h"
9 #include "shared/source/memory_manager/unified_memory_manager.h"
10 #include "shared/test/common/helpers/debug_manager_state_restore.h"
11
12 #include "opencl/source/event/user_event.h"
13 #include "opencl/test/unit_test/context/driver_diagnostics_tests.h"
14 #include "opencl/test/unit_test/fixtures/buffer_fixture.h"
15 #include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
16
17 using namespace NEO;
18
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint)19 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
20
21 buffer->forceDisallowCPUCopy = false;
22 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
23 pCmdQ->enqueueReadBuffer(
24 buffer,
25 CL_TRUE,
26 0,
27 MemoryConstants::cacheLineSize,
28 ptr,
29 nullptr,
30 0,
31 nullptr,
32 nullptr);
33 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
34 EXPECT_TRUE(containsHint(expectedHint, userData));
35 alignedFree(ptr);
36 }
37
TEST_P(PerformanceHintEnqueueReadBufferTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments)38 TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) {
39
40 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
41 uintptr_t addressForReadBuffer = (uintptr_t)ptr;
42 size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
43 if (!alignedAddress) {
44 addressForReadBuffer++;
45 }
46 if (!alignedSize) {
47 sizeForReadBuffer--;
48 }
49 pCmdQ->enqueueReadBuffer(buffer, CL_FALSE,
50 0,
51 sizeForReadBuffer,
52 (void *)addressForReadBuffer,
53 nullptr,
54 0,
55 nullptr,
56 nullptr);
57 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBuffer);
58 EXPECT_TRUE(containsHint(expectedHint, userData));
59 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
60 EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
61 alignedFree(ptr);
62 }
63
TEST_P(PerformanceHintEnqueueReadBufferTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments)64 TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) {
65
66 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
67 uintptr_t addressForReadBufferRect = (uintptr_t)ptr;
68 size_t sizeForReadBufferRect = MemoryConstants::cacheLineSize;
69 if (!alignedAddress) {
70 addressForReadBufferRect++;
71 }
72 if (!alignedSize) {
73 sizeForReadBufferRect--;
74 }
75 size_t bufferOrigin[] = {0, 0, 0};
76 size_t hostOrigin[] = {0, 0, 0};
77 size_t region[] = {sizeForReadBufferRect, 1, 1};
78
79 pCmdQ->enqueueReadBufferRect(
80 buffer,
81 CL_TRUE,
82 bufferOrigin,
83 hostOrigin,
84 region,
85 0,
86 0,
87 0,
88 0,
89 (void *)addressForReadBufferRect,
90 0,
91 nullptr,
92 nullptr);
93 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBufferRect);
94 EXPECT_TRUE(containsHint(expectedHint, userData));
95 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBufferRect, sizeForReadBufferRect, MemoryConstants::pageSize, MemoryConstants::pageSize);
96 EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
97 alignedFree(ptr);
98 }
99
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint)100 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
101
102 size_t bufferOrigin[] = {0, 0, 0};
103 size_t hostOrigin[] = {0, 0, 0};
104 size_t region[] = {1, 2, 1};
105 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
106
107 pCmdQ->enqueueReadBufferRect(
108 buffer,
109 CL_FALSE,
110 bufferOrigin,
111 hostOrigin,
112 region,
113 MemoryConstants::cacheLineSize,
114 MemoryConstants::cacheLineSize,
115 MemoryConstants::cacheLineSize,
116 MemoryConstants::cacheLineSize,
117 ptr,
118 0,
119 nullptr,
120 nullptr);
121
122 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
123 EXPECT_TRUE(containsHint(expectedHint, userData));
124 alignedFree(ptr);
125 }
126
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint)127 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
128
129 size_t bufferOrigin[] = {0, 0, 0};
130 size_t hostOrigin[] = {0, 0, 0};
131 size_t region[] = {1, 2, 1};
132
133 pCmdQ->enqueueReadBufferRect(
134 buffer,
135 CL_FALSE,
136 bufferOrigin,
137 hostOrigin,
138 region,
139 MemoryConstants::cacheLineSize,
140 MemoryConstants::cacheLineSize,
141 MemoryConstants::cacheLineSize,
142 MemoryConstants::cacheLineSize,
143 address,
144 0,
145 nullptr,
146 nullptr);
147
148 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), address);
149 EXPECT_TRUE(containsHint(expectedHint, userData));
150 }
151
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint)152 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
153
154 buffer->forceDisallowCPUCopy = true;
155 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
156 pCmdQ->enqueueWriteBuffer(
157 buffer,
158 CL_FALSE,
159 0,
160 MemoryConstants::cacheLineSize,
161 ptr,
162 nullptr,
163 0,
164 nullptr,
165 nullptr);
166 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
167 EXPECT_TRUE(containsHint(expectedHint, userData));
168 alignedFree(ptr);
169 }
170
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint)171 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
172
173 buffer->forceDisallowCPUCopy = true;
174 pCmdQ->enqueueWriteBuffer(
175 buffer,
176 CL_FALSE,
177 0,
178 MemoryConstants::cacheLineSize,
179 address,
180 nullptr,
181 0,
182 nullptr,
183 nullptr);
184 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
185 EXPECT_TRUE(containsHint(expectedHint, userData));
186 }
187
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint)188 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
189
190 buffer->forceDisallowCPUCopy = false;
191 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
192 pCmdQ->enqueueWriteBuffer(
193 buffer,
194 CL_TRUE,
195 0,
196 MemoryConstants::cacheLineSize,
197 ptr,
198 nullptr,
199 0,
200 nullptr,
201 nullptr);
202 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
203 EXPECT_TRUE(containsHint(expectedHint, userData));
204 alignedFree(ptr);
205 }
206
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint)207 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
208
209 buffer->forceDisallowCPUCopy = false;
210 pCmdQ->enqueueWriteBuffer(
211 buffer,
212 CL_TRUE,
213 0,
214 MemoryConstants::cacheLineSize,
215 address,
216 nullptr,
217 0,
218 nullptr,
219 nullptr);
220 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
221 EXPECT_TRUE(containsHint(expectedHint, userData));
222 }
223
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint)224 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
225
226 buffer->forceDisallowCPUCopy = true;
227 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
228 pCmdQ->enqueueReadBuffer(
229 buffer,
230 CL_FALSE,
231 0,
232 MemoryConstants::cacheLineSize,
233 ptr,
234 nullptr,
235 0,
236 nullptr,
237 nullptr);
238 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
239 EXPECT_TRUE(containsHint(expectedHint, userData));
240 alignedFree(ptr);
241 }
242
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint)243 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
244
245 buffer->forceDisallowCPUCopy = true;
246 pCmdQ->enqueueReadBuffer(
247 buffer,
248 CL_FALSE,
249 0,
250 MemoryConstants::cacheLineSize,
251 address,
252 nullptr,
253 0,
254 nullptr,
255 nullptr);
256 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
257 EXPECT_TRUE(containsHint(expectedHint, userData));
258 }
259
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint)260 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
261
262 buffer->forceDisallowCPUCopy = false;
263 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
264 pCmdQ->enqueueReadBuffer(
265 buffer,
266 CL_TRUE,
267 0,
268 MemoryConstants::cacheLineSize,
269 ptr,
270 nullptr,
271 0,
272 nullptr,
273 nullptr);
274 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
275 EXPECT_TRUE(containsHint(expectedHint, userData));
276 alignedFree(ptr);
277 }
278
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint)279 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
280
281 buffer->forceDisallowCPUCopy = false;
282 pCmdQ->enqueueReadBuffer(
283 buffer,
284 CL_TRUE,
285 0,
286 MemoryConstants::cacheLineSize,
287 address,
288 nullptr,
289 0,
290 nullptr,
291 nullptr);
292 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
293 EXPECT_TRUE(containsHint(expectedHint, userData));
294 }
295
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint)296 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) {
297
298 size_t bufferOrigin[] = {0, 0, 0};
299 size_t hostOrigin[] = {0, 0, 0};
300 size_t region[] = {1, 2, 1};
301 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
302
303 pCmdQ->enqueueWriteBufferRect(
304 buffer,
305 CL_FALSE,
306 bufferOrigin,
307 hostOrigin,
308 region,
309 MemoryConstants::cacheLineSize,
310 MemoryConstants::cacheLineSize,
311 MemoryConstants::cacheLineSize,
312 MemoryConstants::cacheLineSize,
313 ptr,
314 0,
315 nullptr,
316 nullptr);
317
318 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
319 EXPECT_TRUE(containsHint(expectedHint, userData));
320 alignedFree(ptr);
321 }
322
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint)323 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) {
324
325 size_t bufferOrigin[] = {0, 0, 0};
326 size_t hostOrigin[] = {0, 0, 0};
327 size_t region[] = {1, 2, 1};
328
329 pCmdQ->enqueueWriteBufferRect(
330 buffer,
331 CL_FALSE,
332 bufferOrigin,
333 hostOrigin,
334 region,
335 MemoryConstants::cacheLineSize,
336 MemoryConstants::cacheLineSize,
337 MemoryConstants::cacheLineSize,
338 MemoryConstants::cacheLineSize,
339 address,
340 0,
341 nullptr,
342 nullptr);
343
344 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer));
345 EXPECT_TRUE(containsHint(expectedHint, userData));
346 }
347
TEST_P(PerformanceHintEnqueueReadImageTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments)348 TEST_P(PerformanceHintEnqueueReadImageTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments) {
349 REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
350
351 size_t hostOrigin[] = {0, 0, 0};
352 void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
353 uintptr_t addressForReadImage = (uintptr_t)ptr;
354 size_t sizeForReadImageInPixels = MemoryConstants::cacheLineSize;
355 bool hintWithMisalignment = !(alignedAddress && alignedSize);
356 if (!alignedAddress) {
357 addressForReadImage++;
358 }
359 if (!alignedSize) {
360 sizeForReadImageInPixels--;
361 }
362 size_t region[] = {sizeForReadImageInPixels, 1, 1};
363 pCmdQ->enqueueReadImage(image,
364 CL_FALSE,
365 hostOrigin,
366 region,
367 0,
368 0,
369 (void *)addressForReadImage,
370 nullptr,
371 0,
372 nullptr,
373 nullptr);
374 size_t sizeForReadImage = sizeForReadImageInPixels * image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes;
375 ASSERT_EQ(alignedSize, isAligned<MemoryConstants::cacheLineSize>(sizeForReadImage));
376
377 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadImage, sizeForReadImage, MemoryConstants::pageSize, MemoryConstants::pageSize);
378 EXPECT_EQ(hintWithMisalignment, containsHint(expectedHint, userData));
379 alignedFree(ptr);
380 }
381
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint)382 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) {
383
384 size_t hostOrigin[] = {0, 0, 0};
385 size_t region[] = {1, 1, 1};
386
387 pCmdQ->enqueueWriteImage(
388 image,
389 CL_FALSE,
390 hostOrigin,
391 region,
392 MemoryConstants::cacheLineSize,
393 MemoryConstants::cacheLineSize,
394 address,
395 nullptr,
396 0,
397 nullptr,
398 nullptr);
399
400 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image));
401 EXPECT_TRUE(containsHint(expectedHint, userData));
402 }
403
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint)404 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) {
405
406 size_t hostOrigin[] = {0, 0, 0};
407 size_t region[] = {1, 1, 1};
408
409 void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer();
410 pCmdQ->enqueueWriteImage(
411 zeroCopyImage.get(),
412 CL_FALSE,
413 hostOrigin,
414 region,
415 MemoryConstants::cacheLineSize,
416 MemoryConstants::cacheLineSize,
417 ptr,
418 nullptr,
419 0,
420 nullptr,
421 nullptr);
422
423 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(zeroCopyImage.get()));
424 EXPECT_TRUE(containsHint(expectedHint, userData));
425 }
426
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint)427 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint) {
428
429 size_t hostOrigin[] = {0, 0, 0};
430 size_t region[] = {1, 1, 1};
431
432 void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer();
433 pCmdQ->enqueueReadImage(
434 zeroCopyImage.get(),
435 CL_FALSE,
436 hostOrigin,
437 region,
438 MemoryConstants::cacheLineSize,
439 MemoryConstants::cacheLineSize,
440 ptr,
441 nullptr,
442 0,
443 nullptr,
444 nullptr);
445
446 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(zeroCopyImage.get()));
447 EXPECT_TRUE(containsHint(expectedHint, userData));
448 }
449
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint)450 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
451
452 Buffer *buffer;
453 void *address;
454 bool zeroCopyBuffer = GetParam();
455 size_t sizeForBuffer = MemoryConstants::cacheLineSize;
456 if (!zeroCopyBuffer) {
457 sizeForBuffer++;
458 }
459
460 address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
461 buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal);
462
463 pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal);
464
465 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer));
466 EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
467
468 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
469 EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
470
471 alignedFree(address);
472 delete buffer;
473 }
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint)474 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
475
476 void *address;
477 bool zeroCopyBuffer = GetParam();
478 UserEvent userEvent(context);
479 cl_event blockedEvent = &userEvent;
480 size_t sizeForBuffer = MemoryConstants::cacheLineSize;
481 if (!zeroCopyBuffer) {
482 sizeForBuffer++;
483 }
484
485 address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
486 auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
487
488 EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
489
490 pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
491 EXPECT_TRUE(pCmdQ->isQueueBlocked());
492 userEvent.setStatus(CL_COMPLETE);
493 EXPECT_FALSE(pCmdQ->isQueueBlocked());
494
495 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer.get()));
496 EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
497
498 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer.get()));
499 EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
500
501 alignedFree(address);
502 }
503
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint)504 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
505
506 Image *image;
507 bool isZeroCopyImage;
508
509 isZeroCopyImage = GetParam();
510
511 size_t origin[] = {0, 0, 0};
512 size_t region[] = {1, 1, 1};
513
514 if (isZeroCopyImage) {
515 image = ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context);
516 } else {
517 image = ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context);
518 }
519 EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
520 pCmdQ->enqueueMapImage(
521 image,
522 CL_FALSE,
523 0,
524 origin,
525 region,
526 nullptr,
527 nullptr,
528 0,
529 nullptr,
530 nullptr,
531 retVal);
532
533 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(image));
534 EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
535
536 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image));
537 EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
538
539 delete image;
540 }
541
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint)542 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
543
544 auto image = std::unique_ptr<Image>(ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context));
545 bool isZeroCopyImage = GetParam();
546
547 size_t origin[] = {0, 0, 0};
548 size_t region[] = {1, 1, 1};
549
550 if (!isZeroCopyImage) {
551 image.reset(ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context));
552 }
553 EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
554
555 UserEvent userEvent(context);
556 cl_event blockedEvent = &userEvent;
557 void *mapPtr = pCmdQ->enqueueMapImage(
558 image.get(),
559 CL_FALSE,
560 0,
561 origin,
562 region,
563 nullptr,
564 nullptr,
565 1,
566 &blockedEvent,
567 nullptr,
568 retVal);
569 EXPECT_TRUE(pCmdQ->isQueueBlocked());
570 userEvent.setStatus(CL_COMPLETE);
571 pCmdQ->enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr);
572
573 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(image.get()));
574 EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
575
576 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image.get()));
577 EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
578 }
579
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint)580 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
581
582 Buffer *buffer;
583 void *address;
584 bool zeroCopyBuffer = GetParam();
585 size_t sizeForBuffer = MemoryConstants::cacheLineSize;
586 if (!zeroCopyBuffer) {
587 sizeForBuffer++;
588 }
589
590 address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
591 buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal);
592
593 void *mapPtr = pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal);
594 pCmdQ->enqueueUnmapMemObject(buffer, mapPtr, 0, nullptr, nullptr);
595
596 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer));
597 EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
598
599 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
600 EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
601
602 alignedFree(address);
603 delete buffer;
604 }
605
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint)606 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
607
608 void *address;
609 bool zeroCopyBuffer = GetParam();
610 UserEvent userEvent(context);
611 cl_event blockedEvent = &userEvent;
612 size_t sizeForBuffer = MemoryConstants::cacheLineSize;
613 if (!zeroCopyBuffer) {
614 sizeForBuffer++;
615 }
616
617 address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
618 auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
619 EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
620
621 void *mapPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
622 EXPECT_TRUE(pCmdQ->isQueueBlocked());
623
624 pCmdQ->enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr);
625 userEvent.setStatus(CL_COMPLETE);
626 EXPECT_FALSE(pCmdQ->isQueueBlocked());
627
628 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer.get()));
629 EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
630
631 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
632 EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
633
634 alignedFree(address);
635 }
636
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint)637 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) {
638
639 Image *image;
640 bool isZeroCopyImage;
641
642 isZeroCopyImage = GetParam();
643
644 size_t origin[] = {0, 0, 0};
645 size_t region[] = {1, 1, 1};
646
647 if (isZeroCopyImage) {
648 image = ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context);
649 } else {
650 image = ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context);
651 }
652 EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
653
654 void *mapPtr = pCmdQ->enqueueMapImage(image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal);
655
656 pCmdQ->enqueueUnmapMemObject(image, mapPtr, 0, nullptr, nullptr);
657
658 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(image));
659 EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
660
661 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
662 EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
663
664 delete image;
665 }
666
TEST_F(PerformanceHintEnqueueTest,GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint)667 TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) {
668 REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0));
669 void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
670
671 pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr, false);
672
673 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA], svmPtr);
674 EXPECT_TRUE(containsHint(expectedHint, userData));
675
676 context->getSVMAllocsManager()->freeSVMAlloc(svmPtr);
677 }
678
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint)679 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
680
681 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
682 EXPECT_EQ(CL_SUCCESS, retVal);
683
684 auto localWorkSize = kernel->getLocalWorkSizeValues();
685 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
686 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
687 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
688 EXPECT_TRUE(containsHint(expectedHint, userData));
689 }
690
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint)691 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
692
693 bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get();
694 DebugManager.flags.EnableComputeWorkSizeND.set(true);
695 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
696 EXPECT_EQ(CL_SUCCESS, retVal);
697
698 auto localWorkSize = kernel->getLocalWorkSizeValues();
699 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
700 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
701 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
702 EXPECT_TRUE(containsHint(expectedHint, userData));
703 DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
704 }
705
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint)706 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
707
708 bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get();
709 DebugManager.flags.EnableComputeWorkSizeND.set(false);
710 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
711 EXPECT_EQ(CL_SUCCESS, retVal);
712
713 auto localWorkSize = kernel->getLocalWorkSizeValues();
714 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
715 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
716 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
717
718 EXPECT_TRUE(containsHint(expectedHint, userData));
719 DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
720 }
721
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint)722 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
723
724 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
725 EXPECT_EQ(CL_SUCCESS, retVal);
726
727 auto localWorkSize = kernel->getLocalWorkSizeValues();
728 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
729 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
730 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
731
732 EXPECT_TRUE(containsHint(expectedHint, userData));
733 }
734
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint)735 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
736
737 DebugManagerStateRestore dbgRestore;
738 DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
739 DebugManager.flags.EnableComputeWorkSizeND.set(false);
740 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
741 EXPECT_EQ(CL_SUCCESS, retVal);
742
743 auto localWorkSize = kernel->getLocalWorkSizeValues();
744 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
745 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
746 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
747
748 EXPECT_TRUE(containsHint(expectedHint, userData));
749 }
750
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint)751 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
752
753 DebugManagerStateRestore dbgRestore;
754 DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
755 DebugManager.flags.EnableComputeWorkSizeND.set(false);
756 retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
757 EXPECT_EQ(CL_SUCCESS, retVal);
758
759 auto localWorkSize = kernel->getLocalWorkSizeValues();
760 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
761 kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
762 *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
763
764 EXPECT_TRUE(containsHint(expectedHint, userData));
765 }
766
TEST_P(PerformanceHintEnqueueKernelBadSizeTest,GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint)767 TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
768 size_t localWorkGroupSize[3];
769 int badSizeDimension;
770 uint32_t workDim = globalWorkGroupSize[1] == 1 ? 1 : globalWorkGroupSize[2] == 1 ? 2
771 : 3;
772
773 DispatchInfo dispatchInfo(&pCmdQ->getClDevice(), kernel, workDim, Vec3<size_t>(globalWorkGroupSize), Vec3<size_t>(0u, 0u, 0u), Vec3<size_t>(0u, 0u, 0u));
774
775 auto computedLocalWorkgroupSize = computeWorkgroupSize(dispatchInfo);
776
777 localWorkGroupSize[0] = computedLocalWorkgroupSize.x;
778 localWorkGroupSize[1] = computedLocalWorkgroupSize.y;
779 localWorkGroupSize[2] = computedLocalWorkgroupSize.z;
780
781 badSizeDimension = GetParam();
782 if (localWorkGroupSize[badSizeDimension] > 1) {
783 localWorkGroupSize[badSizeDimension] /= 2;
784 } else {
785 localWorkGroupSize[0] /= 2;
786 }
787
788 retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);
789 EXPECT_EQ(CL_SUCCESS, retVal);
790
791 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE],
792 localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
793 computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z);
794 EXPECT_TRUE(containsHint(expectedHint, userData));
795 }
796
TEST_F(PerformanceHintEnqueueKernelPrintfTest,GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint)797 TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint) {
798 size_t preferredWorkGroupSize[3];
799 auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
800 if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
801 WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
802 computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
803 } else
804 computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
805 retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
806 EXPECT_EQ(CL_SUCCESS, retVal);
807
808 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
809 EXPECT_TRUE(containsHint(expectedHint, userData));
810 }
811
TEST_F(PerformanceHintEnqueueTest,GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint)812 TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
813 size_t preferredWorkGroupSize[3];
814 size_t globalWorkGroupSize[3] = {1, 1, 1};
815 auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
816 MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
817 Kernel::SimpleKernelArgInfo kernelArgInfo;
818
819 if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
820 WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
821 computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
822 } else
823 computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
824
825 auto buffer = new MockBuffer();
826 buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
827 auto clBuffer = (cl_mem)buffer;
828
829 kernelArgInfo.object = clBuffer;
830 kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
831
832 std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
833 kernelArguments.resize(1);
834 kernelArguments[0] = kernelArgInfo;
835 mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
836 mockKernel.mockKernel->setKernelArguments(kernelArguments);
837
838 retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
839
840 EXPECT_EQ(CL_SUCCESS, retVal);
841
842 snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
843 EXPECT_TRUE(containsHint(expectedHint, userData));
844 delete buffer;
845 }
846
847 const int validDimensions[] = {0, 1, 2};
848
849 INSTANTIATE_TEST_CASE_P(
850 DriverDiagnosticsTests,
851 PerformanceHintEnqueueReadBufferTest,
852 testing::Combine(
853 ::testing::Bool(),
854 ::testing::Bool()));
855
856 INSTANTIATE_TEST_CASE_P(
857 DriverDiagnosticsTests,
858 PerformanceHintEnqueueReadImageTest,
859 testing::Combine(
860 ::testing::Bool(),
861 ::testing::Bool()));
862
863 INSTANTIATE_TEST_CASE_P(
864 DriverDiagnosticsTests,
865 PerformanceHintEnqueueMapTest,
866 testing::Bool());
867
868 INSTANTIATE_TEST_CASE_P(
869 DriverDiagnosticsTests,
870 PerformanceHintEnqueueKernelBadSizeTest,
871 testing::ValuesIn(validDimensions));
872