1 /*
2  * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "KFDLocalMemoryTest.hpp"
25 #include "PM4Queue.hpp"
26 #include "PM4Packet.hpp"
27 #include "SDMAPacket.hpp"
28 #include "SDMAQueue.hpp"
29 #include "Dispatch.hpp"
30 
31 // All tests are marked by their serial number in the QCM FDD
32 
SetUp()33 void KFDLocalMemoryTest::SetUp() {
34     ROUTINE_START
35 
36     KFDBaseComponentTest::SetUp();
37 
38     m_pIsaGen = IsaGenerator::Create(m_FamilyId);
39 
40     ROUTINE_END
41 }
42 
TearDown()43 void KFDLocalMemoryTest::TearDown() {
44     ROUTINE_START
45 
46     if (m_pIsaGen)
47         delete m_pIsaGen;
48     m_pIsaGen = NULL;
49 
50     KFDBaseComponentTest::TearDown();
51 
52     ROUTINE_END
53 }
54 
TEST_F(KFDLocalMemoryTest,BasicTest)55 TEST_F(KFDLocalMemoryTest, BasicTest) {
56     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
57     TEST_START(TESTPROFILE_RUNALL);
58 
59     PM4Queue queue;
60     HSAuint64 AlternateVAGPU;
61     unsigned int BufferSize = PAGE_SIZE;
62 
63     int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
64     ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
65 
66     if (!GetVramSize(defaultGPUNode)) {
67         LOG() << "No VRAM found, skipping the test" << std::endl;
68         return;
69     }
70 
71     HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
72     HsaMemoryBuffer srcSysBuffer(BufferSize, defaultGPUNode, false);
73     HsaMemoryBuffer destSysBuffer(BufferSize, defaultGPUNode);
74     HsaMemoryBuffer srcLocalBuffer(BufferSize, defaultGPUNode, false, true);
75     HsaMemoryBuffer dstLocalBuffer(BufferSize, defaultGPUNode, false, true);
76 
77     srcSysBuffer.Fill(0x01010101);
78 
79     m_pIsaGen->GetCopyDwordIsa(isaBuffer);
80 
81     ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(srcLocalBuffer.As<void*>(), srcLocalBuffer.Size(), &AlternateVAGPU));
82     ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dstLocalBuffer.As<void*>(), dstLocalBuffer.Size(), &AlternateVAGPU));
83 
84     ASSERT_SUCCESS(queue.Create(defaultGPUNode));
85     queue.SetSkipWaitConsump(0);
86 
87     Dispatch dispatch(isaBuffer);
88 
89     dispatch.SetArgs(srcSysBuffer.As<void*>(), srcLocalBuffer.As<void*>());
90     dispatch.Submit(queue);
91     dispatch.Sync(g_TestTimeOut);
92 
93     dispatch.SetArgs(srcLocalBuffer.As<void*>(), dstLocalBuffer.As<void*>());
94     dispatch.Submit(queue);
95     dispatch.Sync(g_TestTimeOut);
96 
97     dispatch.SetArgs(dstLocalBuffer.As<void*>(), destSysBuffer.As<void*>());
98     dispatch.Submit(queue);
99     dispatch.Sync(g_TestTimeOut);
100 
101     ASSERT_SUCCESS(queue.Destroy());
102 
103     ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(srcLocalBuffer.As<void*>()));
104     ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(dstLocalBuffer.As<void*>()));
105     ASSERT_EQ(destSysBuffer.As<unsigned int*>()[0], 0x01010101);
106 
107     TEST_END
108 }
109 
TEST_F(KFDLocalMemoryTest,VerifyContentsAfterUnmapAndMap)110 TEST_F(KFDLocalMemoryTest, VerifyContentsAfterUnmapAndMap) {
111     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
112     TEST_START(TESTPROFILE_RUNALL);
113 
114     PM4Queue queue;
115     HSAuint64 AlternateVAGPU;
116     unsigned int BufferSize = PAGE_SIZE;
117 
118     int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
119     ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
120 
121     if (!GetVramSize(defaultGPUNode)) {
122         LOG() << "No VRAM found, skipping the test" << std::endl;
123         return;
124     }
125 
126     HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
127     HsaMemoryBuffer SysBufferA(BufferSize, defaultGPUNode, false);
128     HsaMemoryBuffer SysBufferB(BufferSize, defaultGPUNode, true);
129     HsaMemoryBuffer LocalBuffer(BufferSize, defaultGPUNode, true, true);
130 
131     SysBufferA.Fill(0x01010101);
132 
133     m_pIsaGen->GetCopyDwordIsa(isaBuffer);
134 
135     ASSERT_SUCCESS(queue.Create(defaultGPUNode));
136     queue.SetSkipWaitConsump(0);
137 
138     if (!is_dgpu())
139         ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU));
140 
141     Dispatch dispatch(isaBuffer);
142 
143     dispatch.SetArgs(SysBufferA.As<void*>(), LocalBuffer.As<void*>());
144     dispatch.Submit(queue);
145     dispatch.Sync(g_TestTimeOut);
146 
147     ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()));
148     ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU));
149 
150     dispatch.SetArgs(LocalBuffer.As<void*>(), SysBufferB.As<void*>());
151     dispatch.Submit(queue);
152     dispatch.Sync(g_TestTimeOut);
153 
154     ASSERT_SUCCESS(queue.Destroy());
155     ASSERT_EQ(SysBufferB.As<unsigned int*>()[0], 0x01010101);
156     if (!is_dgpu())
157         ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()));
158 
159     TEST_END
160 }
161 
162 /* Deliberately fragment GPUVM aperture to fill up address space
163  *
164  * General idea: Allocate buffers, but don't map them to GPU. This
165  * will reserve virtual address space without pinning physical
166  * memory. It should allow using more address space than physically
167  * available memory.
168  *
169  * Even without pinning memory, TTM will still commit memory at
170  * allocation time and swap out movable buffers to system memory or
171  * even the hard drive, if it needs to. So we can't allocate arbitrary
172  * amounts of virtual memory.
173  *
174  * Strategy to maximize the amount of allocated, fragmented address
175  * space while keeping the amount of committed memory bounded at all
176  * times:
177  *
178  * 1. Allocate N blocks of a given size, initially 1 page
179  * 2. Free every other block, creating holes in the address space.
180  *    This frees up half the memory
181  * 3. Allocate N/4 blocks of 2-pages each. This requires as much
182  *    memory as was freed in step 2. The block size is bigger than
183  *    the 1-page holes, so new address space will be used.
184  * 4. Free half the blocks just allocated, and half of the
185  *    remaining blocks of step 1. This creates 3-page holes between
186  *    the 1-page blocks from step 1, and 2-page holes between the
187  *    2-page blocks from step 3. It frees up half of the total
188  *    memory.
189  * 5. Double the block size to 4, devide number of blocks by 2.
190  *    Again, this will require the amount of memory freed in step 4.
191  *    The block size 4 is bigger than the biggest hole (3 pages).
192  * 6. Free half the memory again, creating 7-page holes between
193  *    1-page blocks, 6-page holes between 2-page blocks, and 4-page
194  *    holes between 4-page blocks.
195  *
196  * Repeat, doubling block size and halving number of blocks in each
197  * iteration. Each iteration starts and ends with half the total
198  * memory free. Because the block size is always bigger than the
199  * biggest hole, each iteration increases the amount of address space
200  * occupied by half the total memory size. Once the block size reaches
201  * half of the free memory (1/4 of total memory) the limit is reached.
202  *
203  * With 2^n pages available memory, n * 2^(n-1) pages of address space
204  * can be reserved. At the end of that process, half the memory will
205  * be free.
206  *
207  *     Total memory     | Fragmented address space
208  * order | pages | size | pages |  size | ratio
209  * ------+-------+------+-------+-------+-------
210  *     2 |    4  |  16K |    4  |   16K |   1
211  *     3 |    8  |  32K |   12  |   48K |   1.5
212  *     4 |   16  |  64K |   32  |  128K |   2
213  *     5 |   32  | 128K |   80  |  320K |   2.5
214  *     6 |   64  | 256K |  192  |  768K |   3
215  *     7 |  128  | 512K |  448  | 1.75M |   3.5
216  *     8 |  256  |   1M |    1M |    4M |   4
217  *     9 |  512  |   2M | 2.25M |    9M |   4.5
218  *    10 |    1K |   4M |    5M |   20M |   5
219  *    11 |    2K |   8M |   11M |   44M |   5.5
220  *    12 |    4K |  16M |   24M |   96M |   6
221  *    13 |    8K |  32M |   52M |  208M |   6.5
222  *    14 |   16K |  64M |  112M |  448M |   7
223  *    15 |   32K | 128M |  240M |  960M |   7.5
224  *    16 |   64K | 256M |  512M |    2G |   8
225  *    17 |  128K | 512M | 1088M | 4.25G |   8.5
226  *    18 |  256K |   1G | 2.25G |    9G |   9
227  *    19 |  512K |   2G | 4.75G |   19G |   9.5
228  *    20 |    1M |   4G |   10G |   40G |  10
229  */
TEST_F(KFDLocalMemoryTest,Fragmentation)230 TEST_F(KFDLocalMemoryTest, Fragmentation) {
231     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
232     TEST_START(TESTPROFILE_RUNALL);
233 
234     int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
235     ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
236 
237     HSAuint64 fbSize;
238 
239     fbSize = GetVramSize(defaultGPUNode);
240 
241     if (!fbSize) {
242         LOG() << "No VRAM found, skipping test." << std::endl;
243         return;
244     } else {
245         LOG() << "Found VRAM of " << std::dec << (fbSize >> 20) << "MB." << std::endl;
246     }
247 
248     /* Use up to half of available memory. Using more results in
249      * excessive memory movement in TTM and slows down the test too
250      * much. maxOrder is the size of the biggest block that will be
251      * allocated. It's 1/4 of the usable memory, so 1/8 the total FB
252      * size in pages.
253      *
254      * Use 8x bigger page size on dGPU to match Tonga alignment
255      * workaround. Also nicely matches the 8x bigger GPUVM address
256      * space on AMDGPU compared to RADEON.
257      */
258     unsigned pageSize = is_dgpu() ? PAGE_SIZE*8 : PAGE_SIZE;
259     fbSize /= pageSize;
260     unsigned maxOrder = 0;
261     // Limit maxOrder up to 14 so this test doesn't run longer than 10 mins
262     while (((fbSize >> maxOrder) >= 16) && (maxOrder < 14))
263         maxOrder++;
264 
265     /* Queue and memory used by the shader copy tests */
266     HsaMemoryBuffer sysBuffer(PAGE_SIZE, defaultGPUNode, false);
267     PM4Queue queue;
268     ASSERT_SUCCESS(queue.Create(defaultGPUNode));
269     HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
270     m_pIsaGen->GetCopyDwordIsa(isaBuffer);
271 
272     /* Allocate and test memory using the strategy explained at the top */
273     HSAKMT_STATUS status;
274     HsaMemFlags memFlags = {0};
275     memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
276     memFlags.ui32.HostAccess = 0;
277     memFlags.ui32.NonPaged = 1;
278     struct {
279         void **pointers;
280         unsigned long nPages;
281     } pages[maxOrder+1];
282     unsigned order, o;
283     unsigned long p;
284     HSAuint64 size;
285     unsigned value = 0;
286     memset(pages, 0, sizeof(pages));
287     for (order = 0; order <= maxOrder; order++) {
288         // At maxOrder, block sizes is 1/4 of available memory
289         pages[order].nPages = 1UL << (maxOrder - order + 2);
290         // At order != 0, 1/2 the memory is already allocated
291         if (order > 0)
292             pages[order].nPages >>= 1;
293         // Allocate page pointers
294         pages[order].pointers = new void *[pages[order].nPages];
295         EXPECT_NE((void **)NULL, pages[order].pointers)
296             << "Couldn't allocate memory for " << pages[order].nPages
297             << " pointers at order " << order << std::endl;
298         if (!pages[order].pointers) {
299             pages[order].nPages = 0;
300             break;
301         }
302         /* Allocate buffers and access the start and end of every one:
303          * 1. Copy from sysBuffer[0] to start of block
304          * 2. Copy from start of block to end of block
305          * 3. Copy from end of block to sysBuffer[1]
306          * 4. Compare results */
307         size = (HSAuint64)(1 << order) * pageSize;
308         LOG() << std::dec << "Trying to allocate " << pages[order].nPages
309               << " order " << order << " blocks " << std::endl;
310         for (p = 0; p < pages[order].nPages; p++) {
311             status = hsaKmtAllocMemory(defaultGPUNode, size,
312                                        memFlags, &pages[order].pointers[p]);
313             if (status != HSAKMT_STATUS_SUCCESS) {
314                 EXPECT_EQ(HSAKMT_STATUS_NO_MEMORY, status);
315                 pages[order].nPages = p;
316                 break;
317             }
318 
319             void *bufferEnd = (void *)((unsigned long)pages[order].pointers[p]
320                                        + size - sizeof(unsigned));
321             sysBuffer.As<unsigned *>()[0] = ++value;
322 
323             status = hsaKmtMapMemoryToGPU(pages[order].pointers[p],
324                                                 size, NULL);
325             if (status != HSAKMT_STATUS_SUCCESS) {
326                 ASSERT_SUCCESS(hsaKmtFreeMemory(pages[order].pointers[p],
327                                                 size));
328                 pages[order].nPages = p;
329                 break;
330             }
331             Dispatch dispatch1(isaBuffer);
332             dispatch1.SetArgs(sysBuffer.As<void*>(), pages[order].pointers[p]);
333             dispatch1.Submit(queue);
334             // no sync needed for multiple GPU dispatches to the same queue
335 
336             Dispatch dispatch2(isaBuffer);
337             dispatch2.SetArgs(pages[order].pointers[p], bufferEnd);
338             dispatch2.Submit(queue);
339             // no sync needed for multiple GPU dispatches to the same queue
340 
341             Dispatch dispatch3(isaBuffer);
342             dispatch3.SetArgs(bufferEnd,
343                               (void *)&(sysBuffer.As<unsigned*>()[1]));
344             dispatch3.Submit(queue);
345             dispatch3.Sync(g_TestTimeOut);
346             EXPECT_EQ(value, sysBuffer.As<unsigned *>()[1]);
347 
348             EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pages[order].pointers[p]));
349         }
350         LOG() << "  Got " << pages[order].nPages
351               << ", end of last block addr: "
352               << (void *)((unsigned long)pages[order].pointers[p-1] + size - 1)
353               << std::endl;
354 
355         // Now free half the memory
356         for (o = 0; o <= order; o++) {
357             unsigned long step = 1UL << (order - o + 1);
358             unsigned long offset = (step >> 1) - 1;
359             size = (HSAuint64)(1 << o) * pageSize;
360             LOG() << "  Freeing every " << step << "th order "
361                   << o << " block starting with " << offset << std::endl;
362             for (p = offset; p < pages[o].nPages; p += step) {
363                 ASSERT_NE((void **)NULL, pages[o].pointers[p]);
364                 EXPECT_SUCCESS(hsaKmtFreeMemory(pages[o].pointers[p], size));
365                 pages[o].pointers[p] = NULL;
366             }
367         }
368     }
369 
370     /* Clean up */
371     for (order = 0; order <= maxOrder; order++) {
372         if (pages[order].pointers == NULL)
373             continue;
374 
375         size = (HSAuint64)(1 << order) * pageSize;
376         for (p = 0; p < pages[order].nPages; p++)
377             if (pages[order].pointers[p] != NULL)
378                 EXPECT_SUCCESS(hsaKmtFreeMemory(pages[order].pointers[p], size));
379 
380         delete[] pages[order].pointers;
381     }
382 
383     ASSERT_SUCCESS(queue.Destroy());
384 
385     TEST_END
386 }
387 
TEST_F(KFDLocalMemoryTest,CheckZeroInitializationVram)388 TEST_F(KFDLocalMemoryTest, CheckZeroInitializationVram) {
389     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
390     TEST_START(TESTPROFILE_RUNALL);
391 
392     int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
393     ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
394 
395     /* Testing VRAM */
396     HSAuint64 vramSizeMB = GetVramSize(defaultGPUNode) >> 20;
397 
398     if (!vramSizeMB) {
399         LOG() << "No VRAM found, skipping the test" << std::endl;
400         return;
401     }
402 
403     HSAuint64 vramBufSizeMB = vramSizeMB >> 2;
404     /* limit the buffer size in order not to overflow the SDMA queue buffer. */
405     if (vramBufSizeMB > 1024) {
406         vramBufSizeMB = 1024;
407     }
408     HSAuint64 vramBufSize = vramBufSizeMB * 1024 * 1024;
409 
410     /* Make sure the entire VRAM is used at least once */
411     int count = (vramSizeMB + vramBufSizeMB - 1) / vramBufSizeMB + 1;
412 
413     LOG() << "Using " << std::dec << vramBufSizeMB
414             << "MB VRAM buffer to test " << std::dec << count
415             << " times"<< std::endl;
416 
417     SDMAQueue sdmaQueue;
418     ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode, 8 * PAGE_SIZE));
419 
420     HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
421     volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
422 
423     unsigned int offset = 2060;  // a constant offset, should be 4 aligned.
424 
425     while (count--) {
426         HsaMemoryBuffer localBuffer(vramBufSize, defaultGPUNode, false, true);
427 
428         EXPECT_TRUE(localBuffer.IsPattern(0, 0, sdmaQueue, tmp));
429 
430         for (HSAuint64 i = offset; i < vramBufSize;) {
431             EXPECT_TRUE(localBuffer.IsPattern(i, 0, sdmaQueue, tmp));
432             i += 4096;
433         }
434 
435         /* Checking last 4 bytes */
436         EXPECT_TRUE(localBuffer.IsPattern(vramBufSize - 4, 0, sdmaQueue, tmp));
437 
438         localBuffer.Fill(0xABCDEFFF, sdmaQueue);
439     }
440 
441     TEST_END
442 }
443 
TEST_F(KFDLocalMemoryTest,MapVramToGPUNodesTest)444 TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
445     TEST_START(TESTPROFILE_RUNALL);
446 
447     HSAint32 src_node;
448     HSAint32 dst_node;
449     HsaPointerInfo info;
450 
451     const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
452     if (gpuNodes.size() < 2) {
453         LOG() << "Skipping test: Need at least two GPUs" << std::endl;
454         return;
455     }
456 
457     if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
458         src_node = g_TestNodeId;
459         dst_node = g_TestDstNodeId;
460     } else {
461         int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
462 
463         dst_node = m_NodeInfo.FindLargeBarGPUNode();
464         if (dst_node < 0) {
465             LOG() << "Skipping test: Need at least one large bar GPU" << std::endl;
466             return;
467         }
468 
469         if (dst_node != defaultGPUNode) {
470             /* at least one node should be defaultGPUNode */
471             src_node = defaultGPUNode;
472         } else {
473             for (auto node : gpuNodes) {
474                 if (node != dst_node) {
475                     src_node = node;
476                     break;
477                 }
478             }
479         }
480     }
481 
482     LOG() << "Testing from GPU " << src_node << " to GPU " << dst_node << std::endl;
483 
484     void *shared_addr;
485     HSAuint32 nodes[] = { (HSAuint32)src_node, (HSAuint32)dst_node };
486     HsaMemFlags memFlags = {0};
487     memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
488     memFlags.ui32.HostAccess = 1;
489     memFlags.ui32.NonPaged = 1;
490     memFlags.ui32.ExecuteAccess = 1;
491 
492     HsaMemMapFlags mapFlags = {0};
493 
494     EXPECT_SUCCESS(hsaKmtAllocMemory(nodes[1], PAGE_SIZE, memFlags, &shared_addr));
495     EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes(shared_addr, PAGE_SIZE, 2, nodes));
496     EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 2, nodes));
497     EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
498     EXPECT_EQ(info.NRegisteredNodes, 2);
499     EXPECT_EQ(info.NMappedNodes, 2);
500 
501     EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
502     EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
503     EXPECT_EQ(info.NRegisteredNodes, 2);
504     EXPECT_EQ(info.NMappedNodes, 1);
505     EXPECT_EQ(info.MappedNodes[0], nodes[0]);
506 
507     EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[1]));
508     EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
509     EXPECT_EQ(info.NRegisteredNodes, 2);
510     EXPECT_EQ(info.NMappedNodes, 1);
511     EXPECT_EQ(info.MappedNodes[0], nodes[1]);
512 
513     EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
514     EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
515     EXPECT_EQ(info.NRegisteredNodes, 2);
516     EXPECT_EQ(info.NMappedNodes, 0);
517 
518     EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
519     EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
520     EXPECT_EQ(info.NRegisteredNodes, 2);
521     EXPECT_EQ(info.NMappedNodes, 1);
522     EXPECT_EQ(info.MappedNodes[0], nodes[0]);
523 
524     EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
525     EXPECT_SUCCESS(hsaKmtFreeMemory(shared_addr, PAGE_SIZE));
526 
527     TEST_END
528 }
529