1 /*
2 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include "KFDLocalMemoryTest.hpp"
25 #include "PM4Queue.hpp"
26 #include "PM4Packet.hpp"
27 #include "SDMAPacket.hpp"
28 #include "SDMAQueue.hpp"
29 #include "Dispatch.hpp"
30
31 // All tests are marked by their serial number in the QCM FDD
32
SetUp()33 void KFDLocalMemoryTest::SetUp() {
34 ROUTINE_START
35
36 KFDBaseComponentTest::SetUp();
37
38 m_pIsaGen = IsaGenerator::Create(m_FamilyId);
39
40 ROUTINE_END
41 }
42
TearDown()43 void KFDLocalMemoryTest::TearDown() {
44 ROUTINE_START
45
46 if (m_pIsaGen)
47 delete m_pIsaGen;
48 m_pIsaGen = NULL;
49
50 KFDBaseComponentTest::TearDown();
51
52 ROUTINE_END
53 }
54
TEST_F(KFDLocalMemoryTest,BasicTest)55 TEST_F(KFDLocalMemoryTest, BasicTest) {
56 TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
57 TEST_START(TESTPROFILE_RUNALL);
58
59 PM4Queue queue;
60 HSAuint64 AlternateVAGPU;
61 unsigned int BufferSize = PAGE_SIZE;
62
63 int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
64 ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
65
66 if (!GetVramSize(defaultGPUNode)) {
67 LOG() << "No VRAM found, skipping the test" << std::endl;
68 return;
69 }
70
71 HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
72 HsaMemoryBuffer srcSysBuffer(BufferSize, defaultGPUNode, false);
73 HsaMemoryBuffer destSysBuffer(BufferSize, defaultGPUNode);
74 HsaMemoryBuffer srcLocalBuffer(BufferSize, defaultGPUNode, false, true);
75 HsaMemoryBuffer dstLocalBuffer(BufferSize, defaultGPUNode, false, true);
76
77 srcSysBuffer.Fill(0x01010101);
78
79 m_pIsaGen->GetCopyDwordIsa(isaBuffer);
80
81 ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(srcLocalBuffer.As<void*>(), srcLocalBuffer.Size(), &AlternateVAGPU));
82 ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dstLocalBuffer.As<void*>(), dstLocalBuffer.Size(), &AlternateVAGPU));
83
84 ASSERT_SUCCESS(queue.Create(defaultGPUNode));
85 queue.SetSkipWaitConsump(0);
86
87 Dispatch dispatch(isaBuffer);
88
89 dispatch.SetArgs(srcSysBuffer.As<void*>(), srcLocalBuffer.As<void*>());
90 dispatch.Submit(queue);
91 dispatch.Sync(g_TestTimeOut);
92
93 dispatch.SetArgs(srcLocalBuffer.As<void*>(), dstLocalBuffer.As<void*>());
94 dispatch.Submit(queue);
95 dispatch.Sync(g_TestTimeOut);
96
97 dispatch.SetArgs(dstLocalBuffer.As<void*>(), destSysBuffer.As<void*>());
98 dispatch.Submit(queue);
99 dispatch.Sync(g_TestTimeOut);
100
101 ASSERT_SUCCESS(queue.Destroy());
102
103 ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(srcLocalBuffer.As<void*>()));
104 ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(dstLocalBuffer.As<void*>()));
105 ASSERT_EQ(destSysBuffer.As<unsigned int*>()[0], 0x01010101);
106
107 TEST_END
108 }
109
TEST_F(KFDLocalMemoryTest,VerifyContentsAfterUnmapAndMap)110 TEST_F(KFDLocalMemoryTest, VerifyContentsAfterUnmapAndMap) {
111 TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
112 TEST_START(TESTPROFILE_RUNALL);
113
114 PM4Queue queue;
115 HSAuint64 AlternateVAGPU;
116 unsigned int BufferSize = PAGE_SIZE;
117
118 int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
119 ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
120
121 if (!GetVramSize(defaultGPUNode)) {
122 LOG() << "No VRAM found, skipping the test" << std::endl;
123 return;
124 }
125
126 HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
127 HsaMemoryBuffer SysBufferA(BufferSize, defaultGPUNode, false);
128 HsaMemoryBuffer SysBufferB(BufferSize, defaultGPUNode, true);
129 HsaMemoryBuffer LocalBuffer(BufferSize, defaultGPUNode, true, true);
130
131 SysBufferA.Fill(0x01010101);
132
133 m_pIsaGen->GetCopyDwordIsa(isaBuffer);
134
135 ASSERT_SUCCESS(queue.Create(defaultGPUNode));
136 queue.SetSkipWaitConsump(0);
137
138 if (!is_dgpu())
139 ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU));
140
141 Dispatch dispatch(isaBuffer);
142
143 dispatch.SetArgs(SysBufferA.As<void*>(), LocalBuffer.As<void*>());
144 dispatch.Submit(queue);
145 dispatch.Sync(g_TestTimeOut);
146
147 ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()));
148 ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU));
149
150 dispatch.SetArgs(LocalBuffer.As<void*>(), SysBufferB.As<void*>());
151 dispatch.Submit(queue);
152 dispatch.Sync(g_TestTimeOut);
153
154 ASSERT_SUCCESS(queue.Destroy());
155 ASSERT_EQ(SysBufferB.As<unsigned int*>()[0], 0x01010101);
156 if (!is_dgpu())
157 ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()));
158
159 TEST_END
160 }
161
162 /* Deliberately fragment GPUVM aperture to fill up address space
163 *
164 * General idea: Allocate buffers, but don't map them to GPU. This
165 * will reserve virtual address space without pinning physical
166 * memory. It should allow using more address space than physically
167 * available memory.
168 *
169 * Even without pinning memory, TTM will still commit memory at
170 * allocation time and swap out movable buffers to system memory or
171 * even the hard drive, if it needs to. So we can't allocate arbitrary
172 * amounts of virtual memory.
173 *
174 * Strategy to maximize the amount of allocated, fragmented address
175 * space while keeping the amount of committed memory bounded at all
176 * times:
177 *
178 * 1. Allocate N blocks of a given size, initially 1 page
179 * 2. Free every other block, creating holes in the address space.
180 * This frees up half the memory
181 * 3. Allocate N/4 blocks of 2-pages each. This requires as much
182 * memory as was freed in step 2. The block size is bigger than
183 * the 1-page holes, so new address space will be used.
184 * 4. Free half the blocks just allocated, and half of the
185 * remaining blocks of step 1. This creates 3-page holes between
186 * the 1-page blocks from step 1, and 2-page holes between the
187 * 2-page blocks from step 3. It frees up half of the total
188 * memory.
189 * 5. Double the block size to 4, devide number of blocks by 2.
190 * Again, this will require the amount of memory freed in step 4.
191 * The block size 4 is bigger than the biggest hole (3 pages).
192 * 6. Free half the memory again, creating 7-page holes between
193 * 1-page blocks, 6-page holes between 2-page blocks, and 4-page
194 * holes between 4-page blocks.
195 *
196 * Repeat, doubling block size and halving number of blocks in each
197 * iteration. Each iteration starts and ends with half the total
198 * memory free. Because the block size is always bigger than the
199 * biggest hole, each iteration increases the amount of address space
200 * occupied by half the total memory size. Once the block size reaches
201 * half of the free memory (1/4 of total memory) the limit is reached.
202 *
203 * With 2^n pages available memory, n * 2^(n-1) pages of address space
204 * can be reserved. At the end of that process, half the memory will
205 * be free.
206 *
207 * Total memory | Fragmented address space
208 * order | pages | size | pages | size | ratio
209 * ------+-------+------+-------+-------+-------
210 * 2 | 4 | 16K | 4 | 16K | 1
211 * 3 | 8 | 32K | 12 | 48K | 1.5
212 * 4 | 16 | 64K | 32 | 128K | 2
213 * 5 | 32 | 128K | 80 | 320K | 2.5
214 * 6 | 64 | 256K | 192 | 768K | 3
215 * 7 | 128 | 512K | 448 | 1.75M | 3.5
216 * 8 | 256 | 1M | 1M | 4M | 4
217 * 9 | 512 | 2M | 2.25M | 9M | 4.5
218 * 10 | 1K | 4M | 5M | 20M | 5
219 * 11 | 2K | 8M | 11M | 44M | 5.5
220 * 12 | 4K | 16M | 24M | 96M | 6
221 * 13 | 8K | 32M | 52M | 208M | 6.5
222 * 14 | 16K | 64M | 112M | 448M | 7
223 * 15 | 32K | 128M | 240M | 960M | 7.5
224 * 16 | 64K | 256M | 512M | 2G | 8
225 * 17 | 128K | 512M | 1088M | 4.25G | 8.5
226 * 18 | 256K | 1G | 2.25G | 9G | 9
227 * 19 | 512K | 2G | 4.75G | 19G | 9.5
228 * 20 | 1M | 4G | 10G | 40G | 10
229 */
TEST_F(KFDLocalMemoryTest,Fragmentation)230 TEST_F(KFDLocalMemoryTest, Fragmentation) {
231 TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
232 TEST_START(TESTPROFILE_RUNALL);
233
234 int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
235 ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
236
237 HSAuint64 fbSize;
238
239 fbSize = GetVramSize(defaultGPUNode);
240
241 if (!fbSize) {
242 LOG() << "No VRAM found, skipping test." << std::endl;
243 return;
244 } else {
245 LOG() << "Found VRAM of " << std::dec << (fbSize >> 20) << "MB." << std::endl;
246 }
247
248 /* Use up to half of available memory. Using more results in
249 * excessive memory movement in TTM and slows down the test too
250 * much. maxOrder is the size of the biggest block that will be
251 * allocated. It's 1/4 of the usable memory, so 1/8 the total FB
252 * size in pages.
253 *
254 * Use 8x bigger page size on dGPU to match Tonga alignment
255 * workaround. Also nicely matches the 8x bigger GPUVM address
256 * space on AMDGPU compared to RADEON.
257 */
258 unsigned pageSize = is_dgpu() ? PAGE_SIZE*8 : PAGE_SIZE;
259 fbSize /= pageSize;
260 unsigned maxOrder = 0;
261 // Limit maxOrder up to 14 so this test doesn't run longer than 10 mins
262 while (((fbSize >> maxOrder) >= 16) && (maxOrder < 14))
263 maxOrder++;
264
265 /* Queue and memory used by the shader copy tests */
266 HsaMemoryBuffer sysBuffer(PAGE_SIZE, defaultGPUNode, false);
267 PM4Queue queue;
268 ASSERT_SUCCESS(queue.Create(defaultGPUNode));
269 HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
270 m_pIsaGen->GetCopyDwordIsa(isaBuffer);
271
272 /* Allocate and test memory using the strategy explained at the top */
273 HSAKMT_STATUS status;
274 HsaMemFlags memFlags = {0};
275 memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
276 memFlags.ui32.HostAccess = 0;
277 memFlags.ui32.NonPaged = 1;
278 struct {
279 void **pointers;
280 unsigned long nPages;
281 } pages[maxOrder+1];
282 unsigned order, o;
283 unsigned long p;
284 HSAuint64 size;
285 unsigned value = 0;
286 memset(pages, 0, sizeof(pages));
287 for (order = 0; order <= maxOrder; order++) {
288 // At maxOrder, block sizes is 1/4 of available memory
289 pages[order].nPages = 1UL << (maxOrder - order + 2);
290 // At order != 0, 1/2 the memory is already allocated
291 if (order > 0)
292 pages[order].nPages >>= 1;
293 // Allocate page pointers
294 pages[order].pointers = new void *[pages[order].nPages];
295 EXPECT_NE((void **)NULL, pages[order].pointers)
296 << "Couldn't allocate memory for " << pages[order].nPages
297 << " pointers at order " << order << std::endl;
298 if (!pages[order].pointers) {
299 pages[order].nPages = 0;
300 break;
301 }
302 /* Allocate buffers and access the start and end of every one:
303 * 1. Copy from sysBuffer[0] to start of block
304 * 2. Copy from start of block to end of block
305 * 3. Copy from end of block to sysBuffer[1]
306 * 4. Compare results */
307 size = (HSAuint64)(1 << order) * pageSize;
308 LOG() << std::dec << "Trying to allocate " << pages[order].nPages
309 << " order " << order << " blocks " << std::endl;
310 for (p = 0; p < pages[order].nPages; p++) {
311 status = hsaKmtAllocMemory(defaultGPUNode, size,
312 memFlags, &pages[order].pointers[p]);
313 if (status != HSAKMT_STATUS_SUCCESS) {
314 EXPECT_EQ(HSAKMT_STATUS_NO_MEMORY, status);
315 pages[order].nPages = p;
316 break;
317 }
318
319 void *bufferEnd = (void *)((unsigned long)pages[order].pointers[p]
320 + size - sizeof(unsigned));
321 sysBuffer.As<unsigned *>()[0] = ++value;
322
323 status = hsaKmtMapMemoryToGPU(pages[order].pointers[p],
324 size, NULL);
325 if (status != HSAKMT_STATUS_SUCCESS) {
326 ASSERT_SUCCESS(hsaKmtFreeMemory(pages[order].pointers[p],
327 size));
328 pages[order].nPages = p;
329 break;
330 }
331 Dispatch dispatch1(isaBuffer);
332 dispatch1.SetArgs(sysBuffer.As<void*>(), pages[order].pointers[p]);
333 dispatch1.Submit(queue);
334 // no sync needed for multiple GPU dispatches to the same queue
335
336 Dispatch dispatch2(isaBuffer);
337 dispatch2.SetArgs(pages[order].pointers[p], bufferEnd);
338 dispatch2.Submit(queue);
339 // no sync needed for multiple GPU dispatches to the same queue
340
341 Dispatch dispatch3(isaBuffer);
342 dispatch3.SetArgs(bufferEnd,
343 (void *)&(sysBuffer.As<unsigned*>()[1]));
344 dispatch3.Submit(queue);
345 dispatch3.Sync(g_TestTimeOut);
346 EXPECT_EQ(value, sysBuffer.As<unsigned *>()[1]);
347
348 EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pages[order].pointers[p]));
349 }
350 LOG() << " Got " << pages[order].nPages
351 << ", end of last block addr: "
352 << (void *)((unsigned long)pages[order].pointers[p-1] + size - 1)
353 << std::endl;
354
355 // Now free half the memory
356 for (o = 0; o <= order; o++) {
357 unsigned long step = 1UL << (order - o + 1);
358 unsigned long offset = (step >> 1) - 1;
359 size = (HSAuint64)(1 << o) * pageSize;
360 LOG() << " Freeing every " << step << "th order "
361 << o << " block starting with " << offset << std::endl;
362 for (p = offset; p < pages[o].nPages; p += step) {
363 ASSERT_NE((void **)NULL, pages[o].pointers[p]);
364 EXPECT_SUCCESS(hsaKmtFreeMemory(pages[o].pointers[p], size));
365 pages[o].pointers[p] = NULL;
366 }
367 }
368 }
369
370 /* Clean up */
371 for (order = 0; order <= maxOrder; order++) {
372 if (pages[order].pointers == NULL)
373 continue;
374
375 size = (HSAuint64)(1 << order) * pageSize;
376 for (p = 0; p < pages[order].nPages; p++)
377 if (pages[order].pointers[p] != NULL)
378 EXPECT_SUCCESS(hsaKmtFreeMemory(pages[order].pointers[p], size));
379
380 delete[] pages[order].pointers;
381 }
382
383 ASSERT_SUCCESS(queue.Destroy());
384
385 TEST_END
386 }
387
TEST_F(KFDLocalMemoryTest,CheckZeroInitializationVram)388 TEST_F(KFDLocalMemoryTest, CheckZeroInitializationVram) {
389 TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
390 TEST_START(TESTPROFILE_RUNALL);
391
392 int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
393 ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
394
395 /* Testing VRAM */
396 HSAuint64 vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
397
398 if (!vramSizeMB) {
399 LOG() << "No VRAM found, skipping the test" << std::endl;
400 return;
401 }
402
403 HSAuint64 vramBufSizeMB = vramSizeMB >> 2;
404 /* limit the buffer size in order not to overflow the SDMA queue buffer. */
405 if (vramBufSizeMB > 1024) {
406 vramBufSizeMB = 1024;
407 }
408 HSAuint64 vramBufSize = vramBufSizeMB * 1024 * 1024;
409
410 /* Make sure the entire VRAM is used at least once */
411 int count = (vramSizeMB + vramBufSizeMB - 1) / vramBufSizeMB + 1;
412
413 LOG() << "Using " << std::dec << vramBufSizeMB
414 << "MB VRAM buffer to test " << std::dec << count
415 << " times"<< std::endl;
416
417 SDMAQueue sdmaQueue;
418 ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode, 8 * PAGE_SIZE));
419
420 HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
421 volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
422
423 unsigned int offset = 2060; // a constant offset, should be 4 aligned.
424
425 while (count--) {
426 HsaMemoryBuffer localBuffer(vramBufSize, defaultGPUNode, false, true);
427
428 EXPECT_TRUE(localBuffer.IsPattern(0, 0, sdmaQueue, tmp));
429
430 for (HSAuint64 i = offset; i < vramBufSize;) {
431 EXPECT_TRUE(localBuffer.IsPattern(i, 0, sdmaQueue, tmp));
432 i += 4096;
433 }
434
435 /* Checking last 4 bytes */
436 EXPECT_TRUE(localBuffer.IsPattern(vramBufSize - 4, 0, sdmaQueue, tmp));
437
438 localBuffer.Fill(0xABCDEFFF, sdmaQueue);
439 }
440
441 TEST_END
442 }
443
TEST_F(KFDLocalMemoryTest,MapVramToGPUNodesTest)444 TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
445 TEST_START(TESTPROFILE_RUNALL);
446
447 HSAint32 src_node;
448 HSAint32 dst_node;
449 HsaPointerInfo info;
450
451 const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
452 if (gpuNodes.size() < 2) {
453 LOG() << "Skipping test: Need at least two GPUs" << std::endl;
454 return;
455 }
456
457 if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
458 src_node = g_TestNodeId;
459 dst_node = g_TestDstNodeId;
460 } else {
461 int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
462
463 dst_node = m_NodeInfo.FindLargeBarGPUNode();
464 if (dst_node < 0) {
465 LOG() << "Skipping test: Need at least one large bar GPU" << std::endl;
466 return;
467 }
468
469 if (dst_node != defaultGPUNode) {
470 /* at least one node should be defaultGPUNode */
471 src_node = defaultGPUNode;
472 } else {
473 for (auto node : gpuNodes) {
474 if (node != dst_node) {
475 src_node = node;
476 break;
477 }
478 }
479 }
480 }
481
482 LOG() << "Testing from GPU " << src_node << " to GPU " << dst_node << std::endl;
483
484 void *shared_addr;
485 HSAuint32 nodes[] = { (HSAuint32)src_node, (HSAuint32)dst_node };
486 HsaMemFlags memFlags = {0};
487 memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
488 memFlags.ui32.HostAccess = 1;
489 memFlags.ui32.NonPaged = 1;
490 memFlags.ui32.ExecuteAccess = 1;
491
492 HsaMemMapFlags mapFlags = {0};
493
494 EXPECT_SUCCESS(hsaKmtAllocMemory(nodes[1], PAGE_SIZE, memFlags, &shared_addr));
495 EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes(shared_addr, PAGE_SIZE, 2, nodes));
496 EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 2, nodes));
497 EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
498 EXPECT_EQ(info.NRegisteredNodes, 2);
499 EXPECT_EQ(info.NMappedNodes, 2);
500
501 EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
502 EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
503 EXPECT_EQ(info.NRegisteredNodes, 2);
504 EXPECT_EQ(info.NMappedNodes, 1);
505 EXPECT_EQ(info.MappedNodes[0], nodes[0]);
506
507 EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[1]));
508 EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
509 EXPECT_EQ(info.NRegisteredNodes, 2);
510 EXPECT_EQ(info.NMappedNodes, 1);
511 EXPECT_EQ(info.MappedNodes[0], nodes[1]);
512
513 EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
514 EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
515 EXPECT_EQ(info.NRegisteredNodes, 2);
516 EXPECT_EQ(info.NMappedNodes, 0);
517
518 EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
519 EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
520 EXPECT_EQ(info.NRegisteredNodes, 2);
521 EXPECT_EQ(info.NMappedNodes, 1);
522 EXPECT_EQ(info.MappedNodes[0], nodes[0]);
523
524 EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
525 EXPECT_SUCCESS(hsaKmtFreeMemory(shared_addr, PAGE_SIZE));
526
527 TEST_END
528 }
529