1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/command_container/implicit_scaling.h"
9 #include "shared/source/command_stream/command_stream_receiver_hw.h"
10 #include "shared/source/command_stream/preemption.h"
11 #include "shared/source/utilities/software_tags_manager.h"
12 #include "shared/test/common/cmd_parse/gen_cmd_parse.h"
13 #include "shared/test/common/helpers/unit_test_helper.h"
14 #include "shared/test/common/mocks/ult_device_factory.h"
15 #include "shared/test/common/test_macros/test.h"
16 
17 #include "level_zero/core/source/fence/fence.h"
18 #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
19 #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
20 #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
21 #include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
22 #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
23 #include "level_zero/core/test/unit_tests/mocks/mock_module.h"
24 
25 namespace L0 {
26 namespace ult {
27 
28 struct CommandQueueExecuteCommandLists : public Test<DeviceFixture> {
SetUpL0::ult::CommandQueueExecuteCommandLists29     void SetUp() override {
30         DeviceFixture::SetUp();
31 
32         ze_result_t returnValue;
33         commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
34         ASSERT_NE(nullptr, commandLists[0]);
35         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
36 
37         commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
38         ASSERT_NE(nullptr, commandLists[1]);
39         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
40     }
41 
TearDownL0::ult::CommandQueueExecuteCommandLists42     void TearDown() override {
43         for (auto i = 0u; i < numCommandLists; i++) {
44             auto commandList = CommandList::fromHandle(commandLists[i]);
45             commandList->destroy();
46         }
47 
48         DeviceFixture::TearDown();
49     }
50 
51     template <typename FamilyType>
52     void twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming);
53 
54     const static uint32_t numCommandLists = 2;
55     ze_command_list_handle_t commandLists[numCommandLists];
56 };
57 
58 struct MultiDeviceCommandQueueExecuteCommandLists : public Test<MultiDeviceFixture> {
SetUpL0::ult::MultiDeviceCommandQueueExecuteCommandLists59     void SetUp() override {
60         DebugManager.flags.EnableWalkerPartition.set(1);
61         numRootDevices = 1u;
62         MultiDeviceFixture::SetUp();
63 
64         uint32_t deviceCount = 1;
65         ze_device_handle_t deviceHandle;
66         driverHandle->getDevice(&deviceCount, &deviceHandle);
67         device = Device::fromHandle(deviceHandle);
68         ASSERT_NE(nullptr, device);
69 
70         ze_result_t returnValue;
71         commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
72         ASSERT_NE(nullptr, commandLists[0]);
73         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
74 
75         commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
76         ASSERT_NE(nullptr, commandLists[1]);
77         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
78     }
79 
TearDownL0::ult::MultiDeviceCommandQueueExecuteCommandLists80     void TearDown() override {
81         for (auto i = 0u; i < numCommandLists; i++) {
82             auto commandList = CommandList::fromHandle(commandLists[i]);
83             commandList->destroy();
84         }
85 
86         MultiDeviceFixture::TearDown();
87     }
88 
89     L0::Device *device = nullptr;
90     const static uint32_t numCommandLists = 2;
91     ze_command_list_handle_t commandLists[numCommandLists];
92 };
93 
HWTEST_F(CommandQueueExecuteCommandLists,whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned)94 HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned) {
95     using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
96     using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
97     using PARSE = typename FamilyType::PARSE;
98 
99     const ze_command_queue_desc_t desc{};
100     ze_result_t returnValue;
101     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
102                                                            device,
103                                                            neoDevice->getDefaultEngine().commandStreamReceiver,
104                                                            &desc,
105                                                            false,
106                                                            false,
107                                                            returnValue));
108     ASSERT_NE(nullptr, commandQueue->commandStream);
109 
110     auto commandList1 = whitebox_cast(CommandList::fromHandle(commandLists[0]));
111     auto commandList2 = whitebox_cast(CommandList::fromHandle(commandLists[1]));
112     commandList1->requiresQueueUncachedMocs = true;
113     commandList2->requiresQueueUncachedMocs = true;
114     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
115     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
116     commandQueue->destroy();
117 }
118 
HWTEST_F(CommandQueueExecuteCommandLists,whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected)119 HWTEST_F(CommandQueueExecuteCommandLists, whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected) {
120     using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
121     using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
122     using PARSE = typename FamilyType::PARSE;
123 
124     const ze_command_queue_desc_t desc{};
125     ze_result_t returnValue;
126     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
127                                                            device,
128                                                            neoDevice->getDefaultEngine().commandStreamReceiver,
129                                                            &desc,
130                                                            false,
131                                                            false,
132                                                            returnValue));
133     ASSERT_NE(nullptr, commandQueue->commandStream);
134 
135     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
136 
137     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
138     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
139 
140     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
141     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
142 
143     GenCmdList cmdList;
144     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
145                                           ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
146                                           usedSpaceAfter));
147 
148     auto itorCurrent = cmdList.begin();
149     for (auto i = 0u; i < numCommandLists; i++) {
150         auto commandList = CommandList::fromHandle(commandLists[i]);
151         auto allocation = commandList->commandContainer.getCmdBufferAllocations()[0];
152 
153         itorCurrent = find<MI_BATCH_BUFFER_START *>(itorCurrent, cmdList.end());
154         ASSERT_NE(cmdList.end(), itorCurrent);
155 
156         auto bbs = genCmdCast<MI_BATCH_BUFFER_START *>(*itorCurrent++);
157         ASSERT_NE(nullptr, bbs);
158         EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH,
159                   bbs->getSecondLevelBatchBuffer());
160         EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT,
161                   bbs->getAddressSpaceIndicator());
162         EXPECT_EQ(allocation->getGpuAddress(), bbs->getBatchBufferStartAddress());
163     }
164 
165     auto itorBBE = find<MI_BATCH_BUFFER_END *>(itorCurrent, cmdList.end());
166     EXPECT_NE(cmdList.end(), itorBBE);
167 
168     commandQueue->destroy();
169 }
170 
HWTEST2_F(CommandQueueExecuteCommandLists,whenUsingFenceThenExpectEndingPipeControlUpdatingFenceAllocation,IsGen9)171 HWTEST2_F(CommandQueueExecuteCommandLists, whenUsingFenceThenExpectEndingPipeControlUpdatingFenceAllocation, IsGen9) {
172     using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
173     using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
174     using PARSE = typename FamilyType::PARSE;
175 
176     ze_command_queue_desc_t desc{};
177     ze_result_t returnValue;
178     desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
179     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
180                                                            device,
181                                                            neoDevice->getDefaultEngine().commandStreamReceiver,
182                                                            &desc,
183                                                            false,
184                                                            false,
185                                                            returnValue));
186     ASSERT_NE(nullptr, commandQueue->commandStream);
187 
188     ze_fence_desc_t fenceDesc{};
189     auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc));
190     ASSERT_NE(nullptr, fence);
191 
192     ze_fence_handle_t fenceHandle = fence->toHandle();
193 
194     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
195 
196     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
197 
198     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
199 
200     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
201     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
202 
203     GenCmdList cmdList;
204     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
205                                           ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
206                                           usedSpaceAfter));
207 
208     //on some platforms Fence update requires more than single PIPE_CONTROL, Fence tag update should be in the third to last command in SKL
209     auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
210     //we require at least one PIPE_CONTROL
211     ASSERT_LE(1u, pipeControls.size());
212     PIPE_CONTROL *fenceUpdate = genCmdCast<PIPE_CONTROL *>(*pipeControls[pipeControls.size() - 3]);
213 
214     EXPECT_EQ(fence->getGpuAddress(), NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*fenceUpdate));
215 
216     EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, fenceUpdate->getPostSyncOperation());
217 
218     uint64_t fenceImmData = Fence::STATE_SIGNALED;
219     EXPECT_EQ(fenceImmData, fenceUpdate->getImmediateData());
220 
221     fence->destroy();
222     commandQueue->destroy();
223 }
224 
HWTEST_F(CommandQueueExecuteCommandLists,whenExecutingCommandListsThenEndingPipeControlCommandIsExpected)225 HWTEST_F(CommandQueueExecuteCommandLists, whenExecutingCommandListsThenEndingPipeControlCommandIsExpected) {
226     using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
227     using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
228     using PARSE = typename FamilyType::PARSE;
229 
230     const ze_command_queue_desc_t desc{};
231     ze_result_t returnValue;
232     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
233                                                            device,
234                                                            neoDevice->getDefaultEngine().commandStreamReceiver,
235                                                            &desc,
236                                                            false,
237                                                            false,
238                                                            returnValue));
239     ASSERT_NE(nullptr, commandQueue->commandStream);
240 
241     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
242 
243     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
244     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
245 
246     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
247     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
248 
249     GenCmdList cmdList;
250     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
251                                           ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
252                                           usedSpaceAfter));
253 
254     // Pipe control w/ Post-sync operation should be the last command
255     auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
256     // We require at least one PIPE_CONTROL
257     ASSERT_LE(1u, pipeControls.size());
258     PIPE_CONTROL *taskCountToWriteCmd = genCmdCast<PIPE_CONTROL *>(*pipeControls[pipeControls.size() - 1]);
259 
260     EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, taskCountToWriteCmd->getPostSyncOperation());
261 
262     uint64_t taskCountToWrite = neoDevice->getDefaultEngine().commandStreamReceiver->peekTaskCount();
263     EXPECT_EQ(taskCountToWrite, taskCountToWriteCmd->getImmediateData());
264 
265     commandQueue->destroy();
266 }
267 
268 using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS,CommandQueueExecuteSupport)269 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS, CommandQueueExecuteSupport) {
270     using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
271     using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
272     using PARSE = typename FamilyType::PARSE;
273     ze_command_queue_desc_t desc = {};
274     ze_result_t returnValue;
275     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
276                                                            device,
277                                                            neoDevice->getDefaultEngine().commandStreamReceiver,
278                                                            &desc,
279                                                            false,
280                                                            false,
281                                                            returnValue));
282 
283     CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u);
284     CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
285 
286     ASSERT_NE(nullptr, commandQueue->commandStream);
287     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
288 
289     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
290     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
291     EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
292 
293     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
294     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
295 
296     GenCmdList cmdList;
297     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
298                                           ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
299                                           usedSpaceAfter));
300 
301     auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
302     auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
303     // We should have only 1 state added
304     ASSERT_EQ(1u, mediaVfeStates.size());
305     ASSERT_EQ(1u, GSBAStates.size());
306 
307     CommandList::fromHandle(commandLists[0])->reset();
308     CommandList::fromHandle(commandLists[1])->reset();
309     CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u);
310     CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
311 
312     ASSERT_NE(nullptr, commandQueue->commandStream);
313     usedSpaceBefore = commandQueue->commandStream->getUsed();
314 
315     result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
316     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
317     EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
318 
319     usedSpaceAfter = commandQueue->commandStream->getUsed();
320     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
321 
322     GenCmdList cmdList1;
323     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList1,
324                                           ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
325                                           usedSpaceAfter));
326 
327     mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
328     GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
329     // We should have 2 states added
330     ASSERT_EQ(2u, mediaVfeStates.size());
331     ASSERT_EQ(2u, GSBAStates.size());
332 
333     commandQueue->destroy();
334 }
335 
HWTEST_F(CommandQueueExecuteCommandLists,givenMidThreadPreemptionWhenCommandsAreExecutedThenStateSipIsAdded)336 HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedThenStateSipIsAdded) {
337     using STATE_SIP = typename FamilyType::STATE_SIP;
338     using PARSE = typename FamilyType::PARSE;
339 
340     ze_command_queue_desc_t desc{};
341     desc.ordinal = 0u;
342     desc.index = 0u;
343     desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
344 
345     std::array<bool, 2> testedInternalFlags = {true, false};
346 
347     for (auto flagInternal : testedInternalFlags) {
348         ze_result_t returnValue;
349         auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
350                                                                device,
351                                                                neoDevice->getDefaultEngine().commandStreamReceiver,
352                                                                &desc,
353                                                                false,
354                                                                flagInternal,
355                                                                returnValue));
356         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
357 
358         ASSERT_NE(nullptr, commandQueue->commandStream);
359 
360         auto usedSpaceBefore = commandQueue->commandStream->getUsed();
361 
362         auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
363         ASSERT_EQ(ZE_RESULT_SUCCESS, result);
364 
365         auto usedSpaceAfter = commandQueue->commandStream->getUsed();
366         ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
367 
368         GenCmdList cmdList;
369         ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
370 
371         auto itorSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
372 
373         auto preemptionMode = neoDevice->getPreemptionMode();
374         if (preemptionMode == NEO::PreemptionMode::MidThread) {
375             EXPECT_NE(cmdList.end(), itorSip);
376 
377             auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation();
378             STATE_SIP *stateSipCmd = reinterpret_cast<STATE_SIP *>(*itorSip);
379             EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer());
380         } else {
381             EXPECT_EQ(cmdList.end(), itorSip);
382         }
383         commandQueue->destroy();
384     }
385 }
386 
HWTEST2_F(CommandQueueExecuteCommandLists,givenMidThreadPreemptionWhenCommandsAreExecutedTwoTimesThenStateSipIsAddedOnlyTheFirstTime,IsAtLeastSkl)387 HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedTwoTimesThenStateSipIsAddedOnlyTheFirstTime, IsAtLeastSkl) {
388     using STATE_SIP = typename FamilyType::STATE_SIP;
389     using PARSE = typename FamilyType::PARSE;
390 
391     ze_command_queue_desc_t desc{};
392     desc.ordinal = 0u;
393     desc.index = 0u;
394     desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
395 
396     std::array<bool, 2> testedInternalFlags = {true, false};
397 
398     for (auto flagInternal : testedInternalFlags) {
399         ze_result_t returnValue;
400         auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
401                                                                device,
402                                                                neoDevice->getDefaultEngine().commandStreamReceiver,
403                                                                &desc,
404                                                                false,
405                                                                flagInternal,
406                                                                returnValue));
407         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
408 
409         ASSERT_NE(nullptr, commandQueue->commandStream);
410 
411         auto usedSpaceBefore = commandQueue->commandStream->getUsed();
412 
413         auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
414         ASSERT_EQ(ZE_RESULT_SUCCESS, result);
415 
416         result = commandQueue->synchronize(0);
417         ASSERT_EQ(ZE_RESULT_SUCCESS, result);
418 
419         auto usedSpaceAfter = commandQueue->commandStream->getUsed();
420         ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
421 
422         GenCmdList cmdList;
423         ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
424 
425         auto itorSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
426 
427         auto preemptionMode = neoDevice->getPreemptionMode();
428         if (preemptionMode == NEO::PreemptionMode::MidThread) {
429             EXPECT_NE(cmdList.end(), itorSip);
430 
431             auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation();
432             STATE_SIP *stateSipCmd = reinterpret_cast<STATE_SIP *>(*itorSip);
433             EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer());
434         } else {
435             EXPECT_EQ(cmdList.end(), itorSip);
436         }
437 
438         result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
439         ASSERT_EQ(ZE_RESULT_SUCCESS, result);
440 
441         result = commandQueue->synchronize(0);
442         ASSERT_EQ(ZE_RESULT_SUCCESS, result);
443 
444         auto usedSpaceAfterSecondExec = commandQueue->commandStream->getUsed();
445         GenCmdList cmdList2;
446         ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList2, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceAfter), usedSpaceAfterSecondExec));
447 
448         itorSip = find<STATE_SIP *>(cmdList2.begin(), cmdList2.end());
449         EXPECT_EQ(cmdList2.end(), itorSip);
450 
451         // No preemption reprogramming
452         auto secondExecMmioCount = countMmio<FamilyType>(cmdList2.begin(), cmdList2.end(), 0x2580u);
453         EXPECT_EQ(0u, secondExecMmioCount);
454 
455         commandQueue->destroy();
456     }
457 }
458 
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned,IsAtLeastSkl)459 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned, IsAtLeastSkl) {
460     ze_command_queue_desc_t desc = {};
461     NEO::CommandStreamReceiver *csr;
462     device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
463 
464     auto pCommandQueue = new MockCommandQueueHw<gfxCoreFamily>{device, csr, &desc};
465     pCommandQueue->initialize(false, false);
466 
467     Mock<::L0::Kernel> kernel;
468     auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
469     kernel.module = pMockModule.get();
470 
471     ze_group_count_t threadGroupDimensions{1, 1, 1};
472     auto pCommandListWithCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
473     pCommandListWithCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
474     pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true);
475 
476     auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
477     pCommandListWithNonCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
478     pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false);
479 
480     {
481         ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
482                                                    pCommandListWithNonCooperativeKernels->toHandle()};
483         auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
484         EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
485     }
486     {
487         ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
488                                                    pCommandListWithCooperativeKernels->toHandle()};
489         auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
490         EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
491     }
492 
493     DebugManagerStateRestore restorer;
494     DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
495     {
496         ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
497                                                    pCommandListWithNonCooperativeKernels->toHandle()};
498         auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
499         EXPECT_EQ(ZE_RESULT_SUCCESS, result);
500     }
501     {
502         ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
503                                                    pCommandListWithCooperativeKernels->toHandle()};
504         auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
505         EXPECT_EQ(ZE_RESULT_SUCCESS, result);
506     }
507     pCommandQueue->destroy();
508 }
509 
HWTEST2_F(CommandQueueExecuteCommandLists,givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted,IsAtLeastXeHpCore)510 HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) {
511     struct MockCsr : NEO::CommandStreamReceiverHw<FamilyType> {
512         using NEO::CommandStreamReceiverHw<FamilyType>::CommandStreamReceiverHw;
513         int submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
514             useSingleSubdeviceValue = batchBuffer.useSingleSubdevice;
515             submitBatchBufferCalled++;
516             return NEO::CommandStreamReceiver::submitBatchBuffer(batchBuffer, allocationsForResidency);
517         }
518         bool useSingleSubdeviceValue = false;
519         uint32_t submitBatchBufferCalled = 0;
520     };
521 
522     NEO::UltDeviceFactory deviceFactory{1, 4};
523     auto pNeoDevice = deviceFactory.rootDevices[0];
524 
525     ze_command_queue_desc_t desc = {};
526     MockCsr *pMockCsr = new MockCsr{*pNeoDevice->getExecutionEnvironment(), pNeoDevice->getRootDeviceIndex(), pNeoDevice->getDeviceBitfield()};
527     pNeoDevice->resetCommandStreamReceiver(pMockCsr);
528 
529     Mock<L0::DeviceImp> device{pNeoDevice, pNeoDevice->getExecutionEnvironment()};
530     auto pCommandQueue = new MockCommandQueueHw<gfxCoreFamily>{&device, pMockCsr, &desc};
531     pCommandQueue->initialize(false, false);
532 
533     Mock<::L0::Kernel> kernel;
534     auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(&device, nullptr));
535     kernel.module = pMockModule.get();
536 
537     ze_group_count_t threadGroupDimensions{1, 1, 1};
538     auto pCommandListWithCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
539     pCommandListWithCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u);
540     pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true);
541     ze_command_list_handle_t commandListCooperative[] = {pCommandListWithCooperativeKernels->toHandle()};
542     auto result = pCommandQueue->executeCommandLists(1, commandListCooperative, nullptr, false);
543     EXPECT_EQ(ZE_RESULT_SUCCESS, result);
544     EXPECT_EQ(1u, pMockCsr->submitBatchBufferCalled);
545     EXPECT_TRUE(pMockCsr->useSingleSubdeviceValue);
546 
547     auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
548     pCommandListWithNonCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u);
549     pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false);
550     ze_command_list_handle_t commandListNonCooperative[] = {pCommandListWithNonCooperativeKernels->toHandle()};
551     result = pCommandQueue->executeCommandLists(1, commandListNonCooperative, nullptr, false);
552     EXPECT_EQ(ZE_RESULT_SUCCESS, result);
553     EXPECT_EQ(2u, pMockCsr->submitBatchBufferCalled);
554     EXPECT_FALSE(pMockCsr->useSingleSubdeviceValue);
555 
556     pCommandQueue->destroy();
557 }
558 
559 template <typename FamilyType>
twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming)560 void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming) {
561     ze_command_queue_desc_t desc = {};
562     desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
563     desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
564 
565     ze_result_t returnValue;
566     auto commandQueue = whitebox_cast(CommandQueue::create(
567         productFamily,
568         device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue));
569     ASSERT_NE(nullptr, commandQueue->commandStream);
570     commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming;
571     preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u;
572     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
573 
574     auto commandListDisabled = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
575     commandListDisabled->commandListPreemptionMode = NEO::PreemptionMode::Disabled;
576 
577     auto commandListThreadGroup = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
578     commandListThreadGroup->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup;
579 
580     ze_command_list_handle_t commandLists[] = {commandListDisabled->toHandle(),
581                                                commandListThreadGroup->toHandle(),
582                                                commandListDisabled->toHandle()};
583     uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
584     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
585     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
586 
587     result = commandQueue->synchronize(0);
588     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
589 
590     EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
591 
592     result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
593     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
594 
595     EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
596 
597     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
598     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
599 
600     GenCmdList cmdList;
601     ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
602         cmdList, commandQueue->commandStream->getCpuBase(), usedSpaceAfter));
603     using STATE_SIP = typename FamilyType::STATE_SIP;
604     using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
605     using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
606     using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
607     using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
608     using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
609 
610     auto preemptionMode = neoDevice->getPreemptionMode();
611     GenCmdList::iterator itor = cmdList.begin();
612 
613     GenCmdList::iterator itorStateSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
614     if (preemptionMode == NEO::PreemptionMode::MidThread) {
615         EXPECT_NE(itorStateSip, cmdList.end());
616 
617         itor = itorStateSip;
618     } else {
619         EXPECT_EQ(itorStateSip, cmdList.end());
620     }
621 
622     MI_LOAD_REGISTER_IMM *lriCmd = nullptr;
623     auto itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
624     if (preemptionCmdProgramming) {
625         EXPECT_NE(itorLri, cmdList.end());
626         //Initial cmdQ preemption
627         lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
628         EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
629 
630         itor = itorLri;
631     } else {
632         EXPECT_EQ(itorLri, cmdList.end());
633     }
634 
635     uint32_t data = 0;
636     //next should be BB_START to 1st Disabled preemption Cmd List
637     auto itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
638     EXPECT_NE(itorBBStart, cmdList.end());
639     itor = itorBBStart;
640 
641     itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
642     if (preemptionCmdProgramming) {
643         EXPECT_NE(itorLri, cmdList.end());
644 
645         lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
646         EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
647         data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
648         EXPECT_EQ(data, lriCmd->getDataDword());
649 
650         //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
651         if (commandQueue->preemptionCmdSyncProgramming) {
652             auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
653             EXPECT_NE(itorPipeControl, cmdList.end());
654         }
655 
656         itor = itorLri;
657     } else {
658         EXPECT_EQ(itorLri, cmdList.end());
659     }
660 
661     //start of thread-group command list
662     itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
663     EXPECT_NE(itorBBStart, cmdList.end());
664     itor = itorBBStart;
665 
666     itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
667     if (preemptionCmdProgramming) {
668         EXPECT_NE(itorLri, cmdList.end());
669         lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
670         EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
671         data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
672         EXPECT_EQ(data, lriCmd->getDataDword());
673 
674         //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
675         if (commandQueue->preemptionCmdSyncProgramming) {
676             auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
677             EXPECT_NE(itorPipeControl, cmdList.end());
678         }
679 
680         itor = itorLri;
681     } else {
682         EXPECT_EQ(itorLri, cmdList.end());
683     }
684 
685     //start of thread-group command list
686     itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
687     EXPECT_NE(itorBBStart, cmdList.end());
688     itor = itorBBStart;
689 
690     // BB end
691     auto itorBBEnd = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
692     EXPECT_NE(itorBBStart, cmdList.end());
693 
694     auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
695     if (preemptionMode == NEO::PreemptionMode::MidThread) {
696         EXPECT_EQ(1u, allStateSips.size());
697     } else {
698         EXPECT_EQ(0u, allStateSips.size());
699     }
700 
701     auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), itorBBEnd, 0x2580u);
702     size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u;
703     EXPECT_EQ(expectedMmioCount, firstExecMmioCount);
704 
705     // Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec
706     // and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it
707     auto secondExecMmioCount = countMmio<FamilyType>(itorBBEnd, cmdList.end(), 0x2580u);
708     expectedMmioCount = preemptionCmdProgramming ? 2u : 0u;
709     EXPECT_EQ(expectedMmioCount, secondExecMmioCount);
710 
711     commandListDisabled->destroy();
712     commandListThreadGroup->destroy();
713     commandQueue->destroy();
714 }
715 
HWTEST2_F(CommandQueueExecuteCommandLists,GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce,IsAtLeastSkl)716 HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce, IsAtLeastSkl) {
717     twoCommandListCommandPreemptionTest<FamilyType>(false);
718 }
719 
HWTEST2_F(CommandQueueExecuteCommandLists,GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce,IsAtLeastSkl)720 HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce, IsAtLeastSkl) {
721     twoCommandListCommandPreemptionTest<FamilyType>(true);
722 }
723 
724 struct CommandQueueExecuteCommandListSWTagsTests : public Test<DeviceFixture> {
SetUpL0::ult::CommandQueueExecuteCommandListSWTagsTests725     void SetUp() override {
726         DebugManager.flags.EnableSWTags.set(true);
727         DeviceFixture::SetUp();
728 
729         ze_result_t returnValue;
730         commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
731         ASSERT_NE(nullptr, commandLists[0]);
732         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
733 
734         ze_command_queue_desc_t desc = {};
735         commandQueue = whitebox_cast(CommandQueue::create(productFamily,
736                                                           device,
737                                                           neoDevice->getDefaultEngine().commandStreamReceiver,
738                                                           &desc,
739                                                           false,
740                                                           false,
741                                                           returnValue));
742         EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
743         ASSERT_NE(nullptr, commandQueue->commandStream);
744     }
745 
TearDownL0::ult::CommandQueueExecuteCommandListSWTagsTests746     void TearDown() override {
747         commandQueue->destroy();
748 
749         for (auto i = 0u; i < numCommandLists; i++) {
750             auto commandList = CommandList::fromHandle(commandLists[i]);
751             commandList->destroy();
752         }
753 
754         DeviceFixture::TearDown();
755     }
756 
757     DebugManagerStateRestore dbgRestorer;
758     const static uint32_t numCommandLists = 1;
759     ze_command_list_handle_t commandLists[numCommandLists];
760     L0::ult::CommandQueue *commandQueue;
761 };
762 
HWTEST_F(CommandQueueExecuteCommandListSWTagsTests,givenEnableSWTagsWhenExecutingCommandListThenHeapAddressesAreInserted)763 HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsWhenExecutingCommandListThenHeapAddressesAreInserted) {
764     using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
765     using PARSE = typename FamilyType::PARSE;
766 
767     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
768 
769     auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false);
770     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
771 
772     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
773     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
774 
775     GenCmdList cmdList;
776     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
777 
778     auto sdis = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
779     ASSERT_LE(2u, sdis.size());
780 
781     auto dbgdocSDI = genCmdCast<MI_STORE_DATA_IMM *>(*sdis[0]);
782     auto dbgddiSDI = genCmdCast<MI_STORE_DATA_IMM *>(*sdis[1]);
783 
784     EXPECT_EQ(dbgdocSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getBXMLHeapAllocation()->getGpuAddress());
785     EXPECT_EQ(dbgddiSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getSWTagHeapAllocation()->getGpuAddress());
786 }
787 
HWTEST_F(CommandQueueExecuteCommandListSWTagsTests,givenEnableSWTagsAndCommandListWithDifferentPreemtpionWhenExecutingCommandListThenPipeControlReasonTagIsInserted)788 HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsAndCommandListWithDifferentPreemtpionWhenExecutingCommandListThenPipeControlReasonTagIsInserted) {
789     using MI_NOOP = typename FamilyType::MI_NOOP;
790     using PARSE = typename FamilyType::PARSE;
791 
792     whitebox_cast(CommandList::fromHandle(commandLists[0]))->commandListPreemptionMode = PreemptionMode::Disabled;
793     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
794 
795     auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false);
796     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
797 
798     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
799     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
800 
801     GenCmdList cmdList;
802     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
803 
804     auto noops = findAll<MI_NOOP *>(cmdList.begin(), cmdList.end());
805     ASSERT_LE(2u, noops.size());
806 
807     bool tagFound = false;
808     for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) {
809 
810         auto noop = genCmdCast<MI_NOOP *>(*(*it));
811         if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::PipeControlReason) == noop->getIdentificationNumber() &&
812             noop->getIdentificationNumberRegisterWriteEnable() == true &&
813             ++it != noops.end()) {
814 
815             noop = genCmdCast<MI_NOOP *>(*(*it));
816             if (noop->getIdentificationNumber() & 1 << 21 &&
817                 noop->getIdentificationNumberRegisterWriteEnable() == false) {
818                 tagFound = true;
819             }
820         }
821     }
822     EXPECT_TRUE(tagFound);
823 }
824 
825 template <typename GfxFamily>
findPartitionRegister(GenCmdList & cmdList,bool expectToFind)826 void findPartitionRegister(GenCmdList &cmdList, bool expectToFind) {
827     using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
828     using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
829 
830     auto loadRegisterMemList = findAll<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
831     bool wparidRegisterFound = false;
832     for (size_t i = 0; i < loadRegisterMemList.size(); i++) {
833         auto loadRegMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemList[i]);
834         if (NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset == loadRegMem->getRegisterAddress()) {
835             wparidRegisterFound = true;
836         }
837     }
838 
839     auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
840     bool offsetRegisterFound = false;
841     for (size_t i = 0; i < loadRegisterImmList.size(); i++) {
842         auto loadRegImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*loadRegisterImmList[i]);
843         if (NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset == loadRegImm->getRegisterOffset()) {
844             offsetRegisterFound = true;
845         }
846     }
847 
848     if (expectToFind) {
849         EXPECT_TRUE(wparidRegisterFound);
850         EXPECT_TRUE(offsetRegisterFound);
851     } else {
852         EXPECT_FALSE(wparidRegisterFound);
853         EXPECT_FALSE(offsetRegisterFound);
854     }
855 }
856 
HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists,givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation,IsAtLeastXeHpCore)857 HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation, IsAtLeastXeHpCore) {
858     using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
859     using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
860     using PARSE = typename FamilyType::PARSE;
861 
862     ze_command_queue_desc_t desc{};
863     desc.ordinal = 0u;
864     desc.index = 0u;
865     desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
866     desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
867 
868     ze_result_t returnValue;
869 
870     auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
871                                                            device,
872                                                            device->getNEODevice()->getDefaultEngine().commandStreamReceiver,
873                                                            &desc,
874                                                            false,
875                                                            false,
876                                                            returnValue));
877     EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
878     EXPECT_EQ(2u, commandQueue->partitionCount);
879     ASSERT_NE(nullptr, commandQueue->commandStream);
880 
881     auto &commandStreamReceiver = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
882     if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
883         commandStreamReceiver->createPreemptionAllocation();
884     }
885 
886     ze_fence_desc_t fenceDesc{};
887     auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc));
888     ASSERT_NE(nullptr, fence);
889     EXPECT_EQ(1u, fence->partitionCount);
890     ze_fence_handle_t fenceHandle = fence->toHandle();
891 
892     //1st execute call initialized pipeline
893     auto usedSpaceBefore = commandQueue->commandStream->getUsed();
894     auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
895     EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
896     auto usedSpaceAfter = commandQueue->commandStream->getUsed();
897 
898     //1st call then initialize registers
899     GenCmdList cmdList;
900     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
901     findPartitionRegister<FamilyType>(cmdList, true);
902 
903     usedSpaceBefore = commandQueue->commandStream->getUsed();
904     result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
905     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
906     usedSpaceAfter = commandQueue->commandStream->getUsed();
907     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
908     size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore;
909     EXPECT_EQ(2u, fence->partitionCount);
910 
911     for (auto i = 0u; i < numCommandLists; i++) {
912         auto commandList = CommandList::fromHandle(commandLists[i]);
913         commandList->partitionCount = 2;
914     }
915 
916     cmdList.clear();
917     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
918     findPartitionRegister<FamilyType>(cmdList, false);
919 
920     usedSpaceBefore = commandQueue->commandStream->getUsed();
921     result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
922     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
923     usedSpaceAfter = commandQueue->commandStream->getUsed();
924     ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
925     size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore;
926     EXPECT_EQ(2u, fence->partitionCount);
927 
928     size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming;
929     EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming);
930 
931     cmdList.clear();
932     ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
933     findPartitionRegister<FamilyType>(cmdList, false);
934 
935     auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
936 
937     uint32_t foundPostSyncPipeControl = 0u;
938     for (size_t i = 0; i < pipeControlList.size(); i++) {
939         auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControlList[i]);
940         if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
941             EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable());
942             foundPostSyncPipeControl++;
943         }
944     }
945     EXPECT_EQ(2u, foundPostSyncPipeControl);
946 
947     fence->destroy();
948     commandQueue->destroy();
949 }
950 
951 } // namespace ult
952 } // namespace L0
953