1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/command_container/implicit_scaling.h"
9 #include "shared/source/command_stream/command_stream_receiver_hw.h"
10 #include "shared/source/command_stream/preemption.h"
11 #include "shared/source/utilities/software_tags_manager.h"
12 #include "shared/test/common/cmd_parse/gen_cmd_parse.h"
13 #include "shared/test/common/helpers/unit_test_helper.h"
14 #include "shared/test/common/mocks/ult_device_factory.h"
15 #include "shared/test/common/test_macros/test.h"
16
17 #include "level_zero/core/source/fence/fence.h"
18 #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
19 #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
20 #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
21 #include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
22 #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
23 #include "level_zero/core/test/unit_tests/mocks/mock_module.h"
24
25 namespace L0 {
26 namespace ult {
27
28 struct CommandQueueExecuteCommandLists : public Test<DeviceFixture> {
SetUpL0::ult::CommandQueueExecuteCommandLists29 void SetUp() override {
30 DeviceFixture::SetUp();
31
32 ze_result_t returnValue;
33 commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
34 ASSERT_NE(nullptr, commandLists[0]);
35 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
36
37 commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
38 ASSERT_NE(nullptr, commandLists[1]);
39 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
40 }
41
TearDownL0::ult::CommandQueueExecuteCommandLists42 void TearDown() override {
43 for (auto i = 0u; i < numCommandLists; i++) {
44 auto commandList = CommandList::fromHandle(commandLists[i]);
45 commandList->destroy();
46 }
47
48 DeviceFixture::TearDown();
49 }
50
51 template <typename FamilyType>
52 void twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming);
53
54 const static uint32_t numCommandLists = 2;
55 ze_command_list_handle_t commandLists[numCommandLists];
56 };
57
58 struct MultiDeviceCommandQueueExecuteCommandLists : public Test<MultiDeviceFixture> {
SetUpL0::ult::MultiDeviceCommandQueueExecuteCommandLists59 void SetUp() override {
60 DebugManager.flags.EnableWalkerPartition.set(1);
61 numRootDevices = 1u;
62 MultiDeviceFixture::SetUp();
63
64 uint32_t deviceCount = 1;
65 ze_device_handle_t deviceHandle;
66 driverHandle->getDevice(&deviceCount, &deviceHandle);
67 device = Device::fromHandle(deviceHandle);
68 ASSERT_NE(nullptr, device);
69
70 ze_result_t returnValue;
71 commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
72 ASSERT_NE(nullptr, commandLists[0]);
73 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
74
75 commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
76 ASSERT_NE(nullptr, commandLists[1]);
77 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
78 }
79
TearDownL0::ult::MultiDeviceCommandQueueExecuteCommandLists80 void TearDown() override {
81 for (auto i = 0u; i < numCommandLists; i++) {
82 auto commandList = CommandList::fromHandle(commandLists[i]);
83 commandList->destroy();
84 }
85
86 MultiDeviceFixture::TearDown();
87 }
88
89 L0::Device *device = nullptr;
90 const static uint32_t numCommandLists = 2;
91 ze_command_list_handle_t commandLists[numCommandLists];
92 };
93
HWTEST_F(CommandQueueExecuteCommandLists,whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned)94 HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned) {
95 using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
96 using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
97 using PARSE = typename FamilyType::PARSE;
98
99 const ze_command_queue_desc_t desc{};
100 ze_result_t returnValue;
101 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
102 device,
103 neoDevice->getDefaultEngine().commandStreamReceiver,
104 &desc,
105 false,
106 false,
107 returnValue));
108 ASSERT_NE(nullptr, commandQueue->commandStream);
109
110 auto commandList1 = whitebox_cast(CommandList::fromHandle(commandLists[0]));
111 auto commandList2 = whitebox_cast(CommandList::fromHandle(commandLists[1]));
112 commandList1->requiresQueueUncachedMocs = true;
113 commandList2->requiresQueueUncachedMocs = true;
114 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
115 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
116 commandQueue->destroy();
117 }
118
HWTEST_F(CommandQueueExecuteCommandLists,whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected)119 HWTEST_F(CommandQueueExecuteCommandLists, whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected) {
120 using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
121 using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
122 using PARSE = typename FamilyType::PARSE;
123
124 const ze_command_queue_desc_t desc{};
125 ze_result_t returnValue;
126 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
127 device,
128 neoDevice->getDefaultEngine().commandStreamReceiver,
129 &desc,
130 false,
131 false,
132 returnValue));
133 ASSERT_NE(nullptr, commandQueue->commandStream);
134
135 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
136
137 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
138 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
139
140 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
141 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
142
143 GenCmdList cmdList;
144 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
145 ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
146 usedSpaceAfter));
147
148 auto itorCurrent = cmdList.begin();
149 for (auto i = 0u; i < numCommandLists; i++) {
150 auto commandList = CommandList::fromHandle(commandLists[i]);
151 auto allocation = commandList->commandContainer.getCmdBufferAllocations()[0];
152
153 itorCurrent = find<MI_BATCH_BUFFER_START *>(itorCurrent, cmdList.end());
154 ASSERT_NE(cmdList.end(), itorCurrent);
155
156 auto bbs = genCmdCast<MI_BATCH_BUFFER_START *>(*itorCurrent++);
157 ASSERT_NE(nullptr, bbs);
158 EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH,
159 bbs->getSecondLevelBatchBuffer());
160 EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT,
161 bbs->getAddressSpaceIndicator());
162 EXPECT_EQ(allocation->getGpuAddress(), bbs->getBatchBufferStartAddress());
163 }
164
165 auto itorBBE = find<MI_BATCH_BUFFER_END *>(itorCurrent, cmdList.end());
166 EXPECT_NE(cmdList.end(), itorBBE);
167
168 commandQueue->destroy();
169 }
170
HWTEST2_F(CommandQueueExecuteCommandLists,whenUsingFenceThenExpectEndingPipeControlUpdatingFenceAllocation,IsGen9)171 HWTEST2_F(CommandQueueExecuteCommandLists, whenUsingFenceThenExpectEndingPipeControlUpdatingFenceAllocation, IsGen9) {
172 using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
173 using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
174 using PARSE = typename FamilyType::PARSE;
175
176 ze_command_queue_desc_t desc{};
177 ze_result_t returnValue;
178 desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
179 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
180 device,
181 neoDevice->getDefaultEngine().commandStreamReceiver,
182 &desc,
183 false,
184 false,
185 returnValue));
186 ASSERT_NE(nullptr, commandQueue->commandStream);
187
188 ze_fence_desc_t fenceDesc{};
189 auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc));
190 ASSERT_NE(nullptr, fence);
191
192 ze_fence_handle_t fenceHandle = fence->toHandle();
193
194 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
195
196 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
197
198 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
199
200 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
201 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
202
203 GenCmdList cmdList;
204 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
205 ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
206 usedSpaceAfter));
207
208 //on some platforms Fence update requires more than single PIPE_CONTROL, Fence tag update should be in the third to last command in SKL
209 auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
210 //we require at least one PIPE_CONTROL
211 ASSERT_LE(1u, pipeControls.size());
212 PIPE_CONTROL *fenceUpdate = genCmdCast<PIPE_CONTROL *>(*pipeControls[pipeControls.size() - 3]);
213
214 EXPECT_EQ(fence->getGpuAddress(), NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*fenceUpdate));
215
216 EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, fenceUpdate->getPostSyncOperation());
217
218 uint64_t fenceImmData = Fence::STATE_SIGNALED;
219 EXPECT_EQ(fenceImmData, fenceUpdate->getImmediateData());
220
221 fence->destroy();
222 commandQueue->destroy();
223 }
224
HWTEST_F(CommandQueueExecuteCommandLists,whenExecutingCommandListsThenEndingPipeControlCommandIsExpected)225 HWTEST_F(CommandQueueExecuteCommandLists, whenExecutingCommandListsThenEndingPipeControlCommandIsExpected) {
226 using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
227 using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
228 using PARSE = typename FamilyType::PARSE;
229
230 const ze_command_queue_desc_t desc{};
231 ze_result_t returnValue;
232 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
233 device,
234 neoDevice->getDefaultEngine().commandStreamReceiver,
235 &desc,
236 false,
237 false,
238 returnValue));
239 ASSERT_NE(nullptr, commandQueue->commandStream);
240
241 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
242
243 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
244 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
245
246 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
247 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
248
249 GenCmdList cmdList;
250 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
251 ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
252 usedSpaceAfter));
253
254 // Pipe control w/ Post-sync operation should be the last command
255 auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
256 // We require at least one PIPE_CONTROL
257 ASSERT_LE(1u, pipeControls.size());
258 PIPE_CONTROL *taskCountToWriteCmd = genCmdCast<PIPE_CONTROL *>(*pipeControls[pipeControls.size() - 1]);
259
260 EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, taskCountToWriteCmd->getPostSyncOperation());
261
262 uint64_t taskCountToWrite = neoDevice->getDefaultEngine().commandStreamReceiver->peekTaskCount();
263 EXPECT_EQ(taskCountToWrite, taskCountToWriteCmd->getImmediateData());
264
265 commandQueue->destroy();
266 }
267
268 using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS,CommandQueueExecuteSupport)269 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS, CommandQueueExecuteSupport) {
270 using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
271 using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
272 using PARSE = typename FamilyType::PARSE;
273 ze_command_queue_desc_t desc = {};
274 ze_result_t returnValue;
275 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
276 device,
277 neoDevice->getDefaultEngine().commandStreamReceiver,
278 &desc,
279 false,
280 false,
281 returnValue));
282
283 CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u);
284 CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
285
286 ASSERT_NE(nullptr, commandQueue->commandStream);
287 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
288
289 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
290 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
291 EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
292
293 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
294 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
295
296 GenCmdList cmdList;
297 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
298 ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
299 usedSpaceAfter));
300
301 auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
302 auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
303 // We should have only 1 state added
304 ASSERT_EQ(1u, mediaVfeStates.size());
305 ASSERT_EQ(1u, GSBAStates.size());
306
307 CommandList::fromHandle(commandLists[0])->reset();
308 CommandList::fromHandle(commandLists[1])->reset();
309 CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u);
310 CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
311
312 ASSERT_NE(nullptr, commandQueue->commandStream);
313 usedSpaceBefore = commandQueue->commandStream->getUsed();
314
315 result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
316 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
317 EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
318
319 usedSpaceAfter = commandQueue->commandStream->getUsed();
320 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
321
322 GenCmdList cmdList1;
323 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList1,
324 ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
325 usedSpaceAfter));
326
327 mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
328 GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
329 // We should have 2 states added
330 ASSERT_EQ(2u, mediaVfeStates.size());
331 ASSERT_EQ(2u, GSBAStates.size());
332
333 commandQueue->destroy();
334 }
335
HWTEST_F(CommandQueueExecuteCommandLists,givenMidThreadPreemptionWhenCommandsAreExecutedThenStateSipIsAdded)336 HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedThenStateSipIsAdded) {
337 using STATE_SIP = typename FamilyType::STATE_SIP;
338 using PARSE = typename FamilyType::PARSE;
339
340 ze_command_queue_desc_t desc{};
341 desc.ordinal = 0u;
342 desc.index = 0u;
343 desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
344
345 std::array<bool, 2> testedInternalFlags = {true, false};
346
347 for (auto flagInternal : testedInternalFlags) {
348 ze_result_t returnValue;
349 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
350 device,
351 neoDevice->getDefaultEngine().commandStreamReceiver,
352 &desc,
353 false,
354 flagInternal,
355 returnValue));
356 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
357
358 ASSERT_NE(nullptr, commandQueue->commandStream);
359
360 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
361
362 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
363 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
364
365 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
366 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
367
368 GenCmdList cmdList;
369 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
370
371 auto itorSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
372
373 auto preemptionMode = neoDevice->getPreemptionMode();
374 if (preemptionMode == NEO::PreemptionMode::MidThread) {
375 EXPECT_NE(cmdList.end(), itorSip);
376
377 auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation();
378 STATE_SIP *stateSipCmd = reinterpret_cast<STATE_SIP *>(*itorSip);
379 EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer());
380 } else {
381 EXPECT_EQ(cmdList.end(), itorSip);
382 }
383 commandQueue->destroy();
384 }
385 }
386
HWTEST2_F(CommandQueueExecuteCommandLists,givenMidThreadPreemptionWhenCommandsAreExecutedTwoTimesThenStateSipIsAddedOnlyTheFirstTime,IsAtLeastSkl)387 HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedTwoTimesThenStateSipIsAddedOnlyTheFirstTime, IsAtLeastSkl) {
388 using STATE_SIP = typename FamilyType::STATE_SIP;
389 using PARSE = typename FamilyType::PARSE;
390
391 ze_command_queue_desc_t desc{};
392 desc.ordinal = 0u;
393 desc.index = 0u;
394 desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
395
396 std::array<bool, 2> testedInternalFlags = {true, false};
397
398 for (auto flagInternal : testedInternalFlags) {
399 ze_result_t returnValue;
400 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
401 device,
402 neoDevice->getDefaultEngine().commandStreamReceiver,
403 &desc,
404 false,
405 flagInternal,
406 returnValue));
407 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
408
409 ASSERT_NE(nullptr, commandQueue->commandStream);
410
411 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
412
413 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
414 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
415
416 result = commandQueue->synchronize(0);
417 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
418
419 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
420 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
421
422 GenCmdList cmdList;
423 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
424
425 auto itorSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
426
427 auto preemptionMode = neoDevice->getPreemptionMode();
428 if (preemptionMode == NEO::PreemptionMode::MidThread) {
429 EXPECT_NE(cmdList.end(), itorSip);
430
431 auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation();
432 STATE_SIP *stateSipCmd = reinterpret_cast<STATE_SIP *>(*itorSip);
433 EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer());
434 } else {
435 EXPECT_EQ(cmdList.end(), itorSip);
436 }
437
438 result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
439 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
440
441 result = commandQueue->synchronize(0);
442 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
443
444 auto usedSpaceAfterSecondExec = commandQueue->commandStream->getUsed();
445 GenCmdList cmdList2;
446 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList2, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceAfter), usedSpaceAfterSecondExec));
447
448 itorSip = find<STATE_SIP *>(cmdList2.begin(), cmdList2.end());
449 EXPECT_EQ(cmdList2.end(), itorSip);
450
451 // No preemption reprogramming
452 auto secondExecMmioCount = countMmio<FamilyType>(cmdList2.begin(), cmdList2.end(), 0x2580u);
453 EXPECT_EQ(0u, secondExecMmioCount);
454
455 commandQueue->destroy();
456 }
457 }
458
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned,IsAtLeastSkl)459 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned, IsAtLeastSkl) {
460 ze_command_queue_desc_t desc = {};
461 NEO::CommandStreamReceiver *csr;
462 device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
463
464 auto pCommandQueue = new MockCommandQueueHw<gfxCoreFamily>{device, csr, &desc};
465 pCommandQueue->initialize(false, false);
466
467 Mock<::L0::Kernel> kernel;
468 auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
469 kernel.module = pMockModule.get();
470
471 ze_group_count_t threadGroupDimensions{1, 1, 1};
472 auto pCommandListWithCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
473 pCommandListWithCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
474 pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true);
475
476 auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
477 pCommandListWithNonCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
478 pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false);
479
480 {
481 ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
482 pCommandListWithNonCooperativeKernels->toHandle()};
483 auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
484 EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
485 }
486 {
487 ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
488 pCommandListWithCooperativeKernels->toHandle()};
489 auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
490 EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
491 }
492
493 DebugManagerStateRestore restorer;
494 DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
495 {
496 ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
497 pCommandListWithNonCooperativeKernels->toHandle()};
498 auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
499 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
500 }
501 {
502 ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
503 pCommandListWithCooperativeKernels->toHandle()};
504 auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
505 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
506 }
507 pCommandQueue->destroy();
508 }
509
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted,IsAtLeastXeHpCore)510 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) {
511 struct MockCsr : NEO::CommandStreamReceiverHw<FamilyType> {
512 using NEO::CommandStreamReceiverHw<FamilyType>::CommandStreamReceiverHw;
513 int submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
514 useSingleSubdeviceValue = batchBuffer.useSingleSubdevice;
515 submitBatchBufferCalled++;
516 return NEO::CommandStreamReceiver::submitBatchBuffer(batchBuffer, allocationsForResidency);
517 }
518 bool useSingleSubdeviceValue = false;
519 uint32_t submitBatchBufferCalled = 0;
520 };
521
522 NEO::UltDeviceFactory deviceFactory{1, 4};
523 auto pNeoDevice = deviceFactory.rootDevices[0];
524
525 ze_command_queue_desc_t desc = {};
526 MockCsr *pMockCsr = new MockCsr{*pNeoDevice->getExecutionEnvironment(), pNeoDevice->getRootDeviceIndex(), pNeoDevice->getDeviceBitfield()};
527 pNeoDevice->resetCommandStreamReceiver(pMockCsr);
528
529 Mock<L0::DeviceImp> device{pNeoDevice, pNeoDevice->getExecutionEnvironment()};
530 auto pCommandQueue = new MockCommandQueueHw<gfxCoreFamily>{&device, pMockCsr, &desc};
531 pCommandQueue->initialize(false, false);
532
533 Mock<::L0::Kernel> kernel;
534 auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(&device, nullptr));
535 kernel.module = pMockModule.get();
536
537 ze_group_count_t threadGroupDimensions{1, 1, 1};
538 auto pCommandListWithCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
539 pCommandListWithCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u);
540 pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true);
541 ze_command_list_handle_t commandListCooperative[] = {pCommandListWithCooperativeKernels->toHandle()};
542 auto result = pCommandQueue->executeCommandLists(1, commandListCooperative, nullptr, false);
543 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
544 EXPECT_EQ(1u, pMockCsr->submitBatchBufferCalled);
545 EXPECT_TRUE(pMockCsr->useSingleSubdeviceValue);
546
547 auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
548 pCommandListWithNonCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u);
549 pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false);
550 ze_command_list_handle_t commandListNonCooperative[] = {pCommandListWithNonCooperativeKernels->toHandle()};
551 result = pCommandQueue->executeCommandLists(1, commandListNonCooperative, nullptr, false);
552 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
553 EXPECT_EQ(2u, pMockCsr->submitBatchBufferCalled);
554 EXPECT_FALSE(pMockCsr->useSingleSubdeviceValue);
555
556 pCommandQueue->destroy();
557 }
558
559 template <typename FamilyType>
twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming)560 void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming) {
561 ze_command_queue_desc_t desc = {};
562 desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
563 desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
564
565 ze_result_t returnValue;
566 auto commandQueue = whitebox_cast(CommandQueue::create(
567 productFamily,
568 device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue));
569 ASSERT_NE(nullptr, commandQueue->commandStream);
570 commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming;
571 preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u;
572 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
573
574 auto commandListDisabled = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
575 commandListDisabled->commandListPreemptionMode = NEO::PreemptionMode::Disabled;
576
577 auto commandListThreadGroup = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
578 commandListThreadGroup->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup;
579
580 ze_command_list_handle_t commandLists[] = {commandListDisabled->toHandle(),
581 commandListThreadGroup->toHandle(),
582 commandListDisabled->toHandle()};
583 uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
584 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
585 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
586
587 result = commandQueue->synchronize(0);
588 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
589
590 EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
591
592 result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
593 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
594
595 EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
596
597 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
598 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
599
600 GenCmdList cmdList;
601 ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
602 cmdList, commandQueue->commandStream->getCpuBase(), usedSpaceAfter));
603 using STATE_SIP = typename FamilyType::STATE_SIP;
604 using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
605 using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
606 using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
607 using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
608 using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
609
610 auto preemptionMode = neoDevice->getPreemptionMode();
611 GenCmdList::iterator itor = cmdList.begin();
612
613 GenCmdList::iterator itorStateSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
614 if (preemptionMode == NEO::PreemptionMode::MidThread) {
615 EXPECT_NE(itorStateSip, cmdList.end());
616
617 itor = itorStateSip;
618 } else {
619 EXPECT_EQ(itorStateSip, cmdList.end());
620 }
621
622 MI_LOAD_REGISTER_IMM *lriCmd = nullptr;
623 auto itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
624 if (preemptionCmdProgramming) {
625 EXPECT_NE(itorLri, cmdList.end());
626 //Initial cmdQ preemption
627 lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
628 EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
629
630 itor = itorLri;
631 } else {
632 EXPECT_EQ(itorLri, cmdList.end());
633 }
634
635 uint32_t data = 0;
636 //next should be BB_START to 1st Disabled preemption Cmd List
637 auto itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
638 EXPECT_NE(itorBBStart, cmdList.end());
639 itor = itorBBStart;
640
641 itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
642 if (preemptionCmdProgramming) {
643 EXPECT_NE(itorLri, cmdList.end());
644
645 lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
646 EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
647 data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
648 EXPECT_EQ(data, lriCmd->getDataDword());
649
650 //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
651 if (commandQueue->preemptionCmdSyncProgramming) {
652 auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
653 EXPECT_NE(itorPipeControl, cmdList.end());
654 }
655
656 itor = itorLri;
657 } else {
658 EXPECT_EQ(itorLri, cmdList.end());
659 }
660
661 //start of thread-group command list
662 itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
663 EXPECT_NE(itorBBStart, cmdList.end());
664 itor = itorBBStart;
665
666 itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
667 if (preemptionCmdProgramming) {
668 EXPECT_NE(itorLri, cmdList.end());
669 lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
670 EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
671 data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
672 EXPECT_EQ(data, lriCmd->getDataDword());
673
674 //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
675 if (commandQueue->preemptionCmdSyncProgramming) {
676 auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
677 EXPECT_NE(itorPipeControl, cmdList.end());
678 }
679
680 itor = itorLri;
681 } else {
682 EXPECT_EQ(itorLri, cmdList.end());
683 }
684
685 //start of thread-group command list
686 itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
687 EXPECT_NE(itorBBStart, cmdList.end());
688 itor = itorBBStart;
689
690 // BB end
691 auto itorBBEnd = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
692 EXPECT_NE(itorBBStart, cmdList.end());
693
694 auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
695 if (preemptionMode == NEO::PreemptionMode::MidThread) {
696 EXPECT_EQ(1u, allStateSips.size());
697 } else {
698 EXPECT_EQ(0u, allStateSips.size());
699 }
700
701 auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), itorBBEnd, 0x2580u);
702 size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u;
703 EXPECT_EQ(expectedMmioCount, firstExecMmioCount);
704
705 // Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec
706 // and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it
707 auto secondExecMmioCount = countMmio<FamilyType>(itorBBEnd, cmdList.end(), 0x2580u);
708 expectedMmioCount = preemptionCmdProgramming ? 2u : 0u;
709 EXPECT_EQ(expectedMmioCount, secondExecMmioCount);
710
711 commandListDisabled->destroy();
712 commandListThreadGroup->destroy();
713 commandQueue->destroy();
714 }
715
HWTEST2_F(CommandQueueExecuteCommandLists,GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce,IsAtLeastSkl)716 HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce, IsAtLeastSkl) {
717 twoCommandListCommandPreemptionTest<FamilyType>(false);
718 }
719
HWTEST2_F(CommandQueueExecuteCommandLists,GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce,IsAtLeastSkl)720 HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce, IsAtLeastSkl) {
721 twoCommandListCommandPreemptionTest<FamilyType>(true);
722 }
723
724 struct CommandQueueExecuteCommandListSWTagsTests : public Test<DeviceFixture> {
SetUpL0::ult::CommandQueueExecuteCommandListSWTagsTests725 void SetUp() override {
726 DebugManager.flags.EnableSWTags.set(true);
727 DeviceFixture::SetUp();
728
729 ze_result_t returnValue;
730 commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
731 ASSERT_NE(nullptr, commandLists[0]);
732 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
733
734 ze_command_queue_desc_t desc = {};
735 commandQueue = whitebox_cast(CommandQueue::create(productFamily,
736 device,
737 neoDevice->getDefaultEngine().commandStreamReceiver,
738 &desc,
739 false,
740 false,
741 returnValue));
742 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
743 ASSERT_NE(nullptr, commandQueue->commandStream);
744 }
745
TearDownL0::ult::CommandQueueExecuteCommandListSWTagsTests746 void TearDown() override {
747 commandQueue->destroy();
748
749 for (auto i = 0u; i < numCommandLists; i++) {
750 auto commandList = CommandList::fromHandle(commandLists[i]);
751 commandList->destroy();
752 }
753
754 DeviceFixture::TearDown();
755 }
756
757 DebugManagerStateRestore dbgRestorer;
758 const static uint32_t numCommandLists = 1;
759 ze_command_list_handle_t commandLists[numCommandLists];
760 L0::ult::CommandQueue *commandQueue;
761 };
762
HWTEST_F(CommandQueueExecuteCommandListSWTagsTests,givenEnableSWTagsWhenExecutingCommandListThenHeapAddressesAreInserted)763 HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsWhenExecutingCommandListThenHeapAddressesAreInserted) {
764 using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
765 using PARSE = typename FamilyType::PARSE;
766
767 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
768
769 auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false);
770 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
771
772 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
773 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
774
775 GenCmdList cmdList;
776 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
777
778 auto sdis = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
779 ASSERT_LE(2u, sdis.size());
780
781 auto dbgdocSDI = genCmdCast<MI_STORE_DATA_IMM *>(*sdis[0]);
782 auto dbgddiSDI = genCmdCast<MI_STORE_DATA_IMM *>(*sdis[1]);
783
784 EXPECT_EQ(dbgdocSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getBXMLHeapAllocation()->getGpuAddress());
785 EXPECT_EQ(dbgddiSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getSWTagHeapAllocation()->getGpuAddress());
786 }
787
HWTEST_F(CommandQueueExecuteCommandListSWTagsTests,givenEnableSWTagsAndCommandListWithDifferentPreemtpionWhenExecutingCommandListThenPipeControlReasonTagIsInserted)788 HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsAndCommandListWithDifferentPreemtpionWhenExecutingCommandListThenPipeControlReasonTagIsInserted) {
789 using MI_NOOP = typename FamilyType::MI_NOOP;
790 using PARSE = typename FamilyType::PARSE;
791
792 whitebox_cast(CommandList::fromHandle(commandLists[0]))->commandListPreemptionMode = PreemptionMode::Disabled;
793 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
794
795 auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false);
796 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
797
798 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
799 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
800
801 GenCmdList cmdList;
802 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
803
804 auto noops = findAll<MI_NOOP *>(cmdList.begin(), cmdList.end());
805 ASSERT_LE(2u, noops.size());
806
807 bool tagFound = false;
808 for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) {
809
810 auto noop = genCmdCast<MI_NOOP *>(*(*it));
811 if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::PipeControlReason) == noop->getIdentificationNumber() &&
812 noop->getIdentificationNumberRegisterWriteEnable() == true &&
813 ++it != noops.end()) {
814
815 noop = genCmdCast<MI_NOOP *>(*(*it));
816 if (noop->getIdentificationNumber() & 1 << 21 &&
817 noop->getIdentificationNumberRegisterWriteEnable() == false) {
818 tagFound = true;
819 }
820 }
821 }
822 EXPECT_TRUE(tagFound);
823 }
824
825 template <typename GfxFamily>
findPartitionRegister(GenCmdList & cmdList,bool expectToFind)826 void findPartitionRegister(GenCmdList &cmdList, bool expectToFind) {
827 using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
828 using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
829
830 auto loadRegisterMemList = findAll<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
831 bool wparidRegisterFound = false;
832 for (size_t i = 0; i < loadRegisterMemList.size(); i++) {
833 auto loadRegMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemList[i]);
834 if (NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset == loadRegMem->getRegisterAddress()) {
835 wparidRegisterFound = true;
836 }
837 }
838
839 auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
840 bool offsetRegisterFound = false;
841 for (size_t i = 0; i < loadRegisterImmList.size(); i++) {
842 auto loadRegImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*loadRegisterImmList[i]);
843 if (NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset == loadRegImm->getRegisterOffset()) {
844 offsetRegisterFound = true;
845 }
846 }
847
848 if (expectToFind) {
849 EXPECT_TRUE(wparidRegisterFound);
850 EXPECT_TRUE(offsetRegisterFound);
851 } else {
852 EXPECT_FALSE(wparidRegisterFound);
853 EXPECT_FALSE(offsetRegisterFound);
854 }
855 }
856
HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists,givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation,IsAtLeastXeHpCore)857 HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation, IsAtLeastXeHpCore) {
858 using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
859 using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
860 using PARSE = typename FamilyType::PARSE;
861
862 ze_command_queue_desc_t desc{};
863 desc.ordinal = 0u;
864 desc.index = 0u;
865 desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
866 desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
867
868 ze_result_t returnValue;
869
870 auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
871 device,
872 device->getNEODevice()->getDefaultEngine().commandStreamReceiver,
873 &desc,
874 false,
875 false,
876 returnValue));
877 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
878 EXPECT_EQ(2u, commandQueue->partitionCount);
879 ASSERT_NE(nullptr, commandQueue->commandStream);
880
881 auto &commandStreamReceiver = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
882 if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
883 commandStreamReceiver->createPreemptionAllocation();
884 }
885
886 ze_fence_desc_t fenceDesc{};
887 auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc));
888 ASSERT_NE(nullptr, fence);
889 EXPECT_EQ(1u, fence->partitionCount);
890 ze_fence_handle_t fenceHandle = fence->toHandle();
891
892 //1st execute call initialized pipeline
893 auto usedSpaceBefore = commandQueue->commandStream->getUsed();
894 auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
895 EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
896 auto usedSpaceAfter = commandQueue->commandStream->getUsed();
897
898 //1st call then initialize registers
899 GenCmdList cmdList;
900 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
901 findPartitionRegister<FamilyType>(cmdList, true);
902
903 usedSpaceBefore = commandQueue->commandStream->getUsed();
904 result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
905 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
906 usedSpaceAfter = commandQueue->commandStream->getUsed();
907 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
908 size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore;
909 EXPECT_EQ(2u, fence->partitionCount);
910
911 for (auto i = 0u; i < numCommandLists; i++) {
912 auto commandList = CommandList::fromHandle(commandLists[i]);
913 commandList->partitionCount = 2;
914 }
915
916 cmdList.clear();
917 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
918 findPartitionRegister<FamilyType>(cmdList, false);
919
920 usedSpaceBefore = commandQueue->commandStream->getUsed();
921 result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
922 ASSERT_EQ(ZE_RESULT_SUCCESS, result);
923 usedSpaceAfter = commandQueue->commandStream->getUsed();
924 ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
925 size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore;
926 EXPECT_EQ(2u, fence->partitionCount);
927
928 size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming;
929 EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming);
930
931 cmdList.clear();
932 ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
933 findPartitionRegister<FamilyType>(cmdList, false);
934
935 auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
936
937 uint32_t foundPostSyncPipeControl = 0u;
938 for (size_t i = 0; i < pipeControlList.size(); i++) {
939 auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControlList[i]);
940 if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
941 EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable());
942 foundPostSyncPipeControl++;
943 }
944 }
945 EXPECT_EQ(2u, foundPostSyncPipeControl);
946
947 fence->destroy();
948 commandQueue->destroy();
949 }
950
951 } // namespace ult
952 } // namespace L0
953