1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * Memory management utilities
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_utilities.h"
15 #include "_hypre_utilities.hpp"
16 
17 #ifdef HYPRE_USE_UMALLOC
18 #undef HYPRE_USE_UMALLOC
19 #endif
20 
21 /******************************************************************************
22  *
23  * Helper routines
24  *
25  *****************************************************************************/
26 
27 /*--------------------------------------------------------------------------
28  * hypre_OutOfMemory
29  *--------------------------------------------------------------------------*/
30 static inline void
hypre_OutOfMemory(size_t size)31 hypre_OutOfMemory(size_t size)
32 {
33    hypre_error_w_msg(HYPRE_ERROR_MEMORY,"Out of memory trying to allocate too many bytes\n");
34    hypre_assert(0);
35    fflush(stdout);
36 }
37 
38 static inline void
hypre_WrongMemoryLocation()39 hypre_WrongMemoryLocation()
40 {
41    hypre_error_w_msg(HYPRE_ERROR_MEMORY, "Wrong HYPRE MEMORY location: Only HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE and HYPRE_MEMORY_HOST_PINNED are supported!\n");
42    hypre_assert(0);
43    fflush(stdout);
44 }
45 
46 /*==========================================================================
47  * Physical memory location (hypre_MemoryLocation) interface
48  *==========================================================================*/
49 
50 /*--------------------------------------------------------------------------
51  * Memset
52  *--------------------------------------------------------------------------*/
53 static inline void
hypre_HostMemset(void * ptr,HYPRE_Int value,size_t num)54 hypre_HostMemset(void *ptr, HYPRE_Int value, size_t num)
55 {
56    memset(ptr, value, num);
57 }
58 
59 static inline void
hypre_DeviceMemset(void * ptr,HYPRE_Int value,size_t num)60 hypre_DeviceMemset(void *ptr, HYPRE_Int value, size_t num)
61 {
62 #if defined(HYPRE_USING_DEVICE_OPENMP)
63 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
64    HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
65 #else
66    memset(ptr, value, num);
67    HYPRE_OMPOffload(hypre__offload_device_num, ptr, num, "update", "to");
68 #endif
69    HYPRE_CUDA_CALL( cudaDeviceSynchronize() );
70 #endif
71 
72 #if defined(HYPRE_USING_CUDA)
73    HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
74 #endif
75 
76 #if defined(HYPRE_USING_HIP)
77    HYPRE_HIP_CALL( hipMemset(ptr, value, num) );
78 #endif
79 }
80 
81 static inline void
hypre_UnifiedMemset(void * ptr,HYPRE_Int value,size_t num)82 hypre_UnifiedMemset(void *ptr, HYPRE_Int value, size_t num)
83 {
84 #if defined(HYPRE_USING_DEVICE_OPENMP)
85    HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
86    HYPRE_CUDA_CALL( cudaDeviceSynchronize() );
87 #endif
88 
89 #if defined(HYPRE_USING_CUDA)
90    HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
91 #endif
92 
93 #if defined(HYPRE_USING_HIP)
94    HYPRE_HIP_CALL( hipMemset(ptr, value, num) );
95 #endif
96 }
97 
98 /*--------------------------------------------------------------------------
99  * Memprefetch
100  *--------------------------------------------------------------------------*/
101 static inline void
hypre_UnifiedMemPrefetch(void * ptr,size_t size,hypre_MemoryLocation location)102 hypre_UnifiedMemPrefetch(void *ptr, size_t size, hypre_MemoryLocation location)
103 {
104 #if defined(HYPRE_USING_GPU)
105 #ifdef HYPRE_DEBUG
106    hypre_MemoryLocation tmp;
107    hypre_GetPointerLocation(ptr, &tmp);
108    /* do not use hypre_assert, which has alloc and free;
109     * will create an endless loop otherwise */
110    assert(hypre_MEMORY_UNIFIED == tmp);
111 #endif
112 #endif
113 
114 #if defined(HYPRE_USING_DEVICE_OPENMP)
115    if (location == hypre_MEMORY_DEVICE)
116    {
117       HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
118                        hypre_HandleCudaComputeStream(hypre_handle())) );
119    }
120    else if (location == hypre_MEMORY_HOST)
121    {
122       HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
123                        hypre_HandleCudaComputeStream(hypre_handle())) );
124    }
125 #endif
126 
127 #if defined(HYPRE_USING_CUDA)
128    if (location == hypre_MEMORY_DEVICE)
129    {
130       HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
131                        hypre_HandleCudaComputeStream(hypre_handle())) );
132    }
133    else if (location == hypre_MEMORY_HOST)
134    {
135       HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
136                        hypre_HandleCudaComputeStream(hypre_handle())) );
137    }
138 #endif
139 
140 #if defined(HYPRE_USING_HIP)
141    // Not currently implemented for HIP, but leaving place holder
142    /*
143     *if (location == hypre_MEMORY_DEVICE)
144     *{
145     *  HYPRE_HIP_CALL( hipMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
146     *                   hypre_HandleCudaComputeStream(hypre_handle())) );
147     *}
148     *else if (location == hypre_MEMORY_HOST)
149     *{
150     *   HYPRE_CUDA_CALL( hipMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
151     *                    hypre_HandleCudaComputeStream(hypre_handle())) );
152     *}
153     */
154 #endif
155 }
156 
157 /*--------------------------------------------------------------------------
158  * Malloc
159  *--------------------------------------------------------------------------*/
160 static inline void *
hypre_HostMalloc(size_t size,HYPRE_Int zeroinit)161 hypre_HostMalloc(size_t size, HYPRE_Int zeroinit)
162 {
163    void *ptr = NULL;
164 
165 #if defined(HYPRE_USING_UMPIRE_HOST)
166    hypre_umpire_host_pooled_allocate(&ptr, size);
167    if (zeroinit)
168    {
169       memset(ptr, 0, size);
170    }
171 #else
172    if (zeroinit)
173    {
174       ptr = calloc(size, 1);
175    }
176    else
177    {
178       ptr = malloc(size);
179    }
180 #endif
181 
182    return ptr;
183 }
184 
185 static inline void *
hypre_DeviceMalloc(size_t size,HYPRE_Int zeroinit)186 hypre_DeviceMalloc(size_t size, HYPRE_Int zeroinit)
187 {
188    void *ptr = NULL;
189 
190    if ( hypre_HandleUserDeviceMalloc(hypre_handle()) )
191    {
192       hypre_HandleUserDeviceMalloc(hypre_handle())(&ptr, size);
193    }
194    else
195    {
196 #if defined(HYPRE_USING_UMPIRE_DEVICE)
197       hypre_umpire_device_pooled_allocate(&ptr, size);
198 #else
199 
200 #if defined(HYPRE_USING_DEVICE_OPENMP)
201 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
202       ptr = omp_target_alloc(size, hypre__offload_device_num);
203 #else
204       ptr = malloc(size + sizeof(size_t));
205       size_t *sp = (size_t*) ptr;
206       sp[0] = size;
207       ptr = (void *) (&sp[1]);
208       HYPRE_OMPOffload(hypre__offload_device_num, ptr, size, "enter", "alloc");
209 #endif
210 #endif
211 
212 #if defined(HYPRE_USING_CUDA)
213 #if defined(HYPRE_USING_DEVICE_POOL)
214       HYPRE_CUDA_CALL( hypre_CachingMallocDevice(&ptr, size) );
215 #else
216       HYPRE_CUDA_CALL( cudaMalloc(&ptr, size) );
217 #endif
218 #endif
219 
220 #if defined(HYPRE_USING_HIP)
221       HYPRE_HIP_CALL( hipMalloc(&ptr, size) );
222 #endif
223 
224 #endif /* #if defined(HYPRE_USING_UMPIRE_DEVICE) */
225    }
226 
227    if (ptr && zeroinit)
228    {
229       hypre_DeviceMemset(ptr, 0, size);
230    }
231 
232    return ptr;
233 }
234 
235 static inline void *
hypre_UnifiedMalloc(size_t size,HYPRE_Int zeroinit)236 hypre_UnifiedMalloc(size_t size, HYPRE_Int zeroinit)
237 {
238    void *ptr = NULL;
239 
240 #if defined(HYPRE_USING_UMPIRE_UM)
241    hypre_umpire_um_pooled_allocate(&ptr, size);
242 #else
243 
244 #if defined(HYPRE_USING_DEVICE_OPENMP)
245    HYPRE_CUDA_CALL( cudaMallocManaged(&ptr, size, cudaMemAttachGlobal) );
246 #endif
247 
248 #if defined(HYPRE_USING_CUDA)
249 #if defined(HYPRE_USING_DEVICE_POOL)
250    HYPRE_CUDA_CALL( hypre_CachingMallocManaged(&ptr, size) );
251 #else
252    HYPRE_CUDA_CALL( cudaMallocManaged(&ptr, size, cudaMemAttachGlobal) );
253 #endif
254 #endif
255 
256 #if defined(HYPRE_USING_HIP)
257    HYPRE_HIP_CALL( hipMallocManaged(&ptr, size, hipMemAttachGlobal) );
258 #endif
259 
260 #endif /* #if defined(HYPRE_USING_UMPIRE_UM) */
261 
262    /* prefecth to device */
263    if (ptr)
264    {
265       hypre_UnifiedMemPrefetch(ptr, size, hypre_MEMORY_DEVICE);
266    }
267 
268    if (ptr && zeroinit)
269    {
270       hypre_UnifiedMemset(ptr, 0, size);
271    }
272 
273    return ptr;
274 }
275 
276 static inline void *
hypre_HostPinnedMalloc(size_t size,HYPRE_Int zeroinit)277 hypre_HostPinnedMalloc(size_t size, HYPRE_Int zeroinit)
278 {
279    void *ptr = NULL;
280 
281 #if defined(HYPRE_USING_UMPIRE_PINNED)
282    hypre_umpire_pinned_pooled_allocate(&ptr, size);
283 #else
284 
285 #if defined(HYPRE_USING_DEVICE_OPENMP)
286    HYPRE_CUDA_CALL( cudaMallocHost(&ptr, size) );
287 #endif
288 
289 #if defined(HYPRE_USING_CUDA)
290    HYPRE_CUDA_CALL( cudaMallocHost(&ptr, size) );
291 #endif
292 
293 #if defined(HYPRE_USING_HIP)
294    HYPRE_HIP_CALL( hipHostMalloc(&ptr, size) );
295 #endif
296 
297 #endif /* #if defined(HYPRE_USING_UMPIRE_PINNED) */
298 
299    if (ptr && zeroinit)
300    {
301       hypre_HostMemset(ptr, 0, size);
302    }
303 
304    return ptr;
305 }
306 
307 static inline void *
hypre_MAlloc_core(size_t size,HYPRE_Int zeroinit,hypre_MemoryLocation location)308 hypre_MAlloc_core(size_t size, HYPRE_Int zeroinit, hypre_MemoryLocation location)
309 {
310    if (size == 0)
311    {
312       return NULL;
313    }
314 
315    void *ptr = NULL;
316 
317    switch (location)
318    {
319       case hypre_MEMORY_HOST :
320          ptr = hypre_HostMalloc(size, zeroinit);
321          break;
322       case hypre_MEMORY_DEVICE :
323          ptr = hypre_DeviceMalloc(size, zeroinit);
324          break;
325       case hypre_MEMORY_UNIFIED :
326          ptr = hypre_UnifiedMalloc(size, zeroinit);
327          break;
328       case hypre_MEMORY_HOST_PINNED :
329          ptr = hypre_HostPinnedMalloc(size, zeroinit);
330          break;
331       default :
332          hypre_WrongMemoryLocation();
333    }
334 
335    if (!ptr)
336    {
337       hypre_OutOfMemory(size);
338       hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1);
339    }
340 
341    return ptr;
342 }
343 
344 void *
_hypre_MAlloc(size_t size,hypre_MemoryLocation location)345 _hypre_MAlloc(size_t size, hypre_MemoryLocation location)
346 {
347    return hypre_MAlloc_core(size, 0, location);
348 }
349 
350 /*--------------------------------------------------------------------------
351  * Free
352  *--------------------------------------------------------------------------*/
353 static inline void
hypre_HostFree(void * ptr)354 hypre_HostFree(void *ptr)
355 {
356 #if defined(HYPRE_USING_UMPIRE_HOST)
357    hypre_umpire_host_pooled_free(ptr);
358 #else
359    free(ptr);
360 #endif
361 }
362 
363 static inline void
hypre_DeviceFree(void * ptr)364 hypre_DeviceFree(void *ptr)
365 {
366    if ( hypre_HandleUserDeviceMfree(hypre_handle()) )
367    {
368       hypre_HandleUserDeviceMfree(hypre_handle())(ptr);
369    }
370    else
371    {
372 #if defined(HYPRE_USING_UMPIRE_DEVICE)
373       hypre_umpire_device_pooled_free(ptr);
374 #else
375 
376 #if defined(HYPRE_USING_DEVICE_OPENMP)
377 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
378       omp_target_free(ptr, hypre__offload_device_num);
379 #else
380       HYPRE_OMPOffload(hypre__offload_device_num, ptr, ((size_t *) ptr)[-1], "exit", "delete");
381 #endif
382 #endif
383 
384 #if defined(HYPRE_USING_CUDA)
385 #if defined(HYPRE_USING_DEVICE_POOL)
386       HYPRE_CUDA_CALL( hypre_CachingFreeDevice(ptr) );
387 #else
388       HYPRE_CUDA_CALL( cudaFree(ptr) );
389 #endif
390 #endif
391 
392 #if defined(HYPRE_USING_HIP)
393       HYPRE_HIP_CALL( hipFree(ptr) );
394 #endif
395 
396 #endif /* #if defined(HYPRE_USING_UMPIRE_DEVICE) */
397    }
398 }
399 
400 static inline void
hypre_UnifiedFree(void * ptr)401 hypre_UnifiedFree(void *ptr)
402 {
403 #if defined(HYPRE_USING_UMPIRE_UM)
404    hypre_umpire_um_pooled_free(ptr);
405 #else
406 
407 #if defined(HYPRE_USING_DEVICE_OPENMP)
408    HYPRE_CUDA_CALL( cudaFree(ptr) );
409 #endif
410 
411 #if defined(HYPRE_USING_CUDA)
412 #if defined(HYPRE_USING_DEVICE_POOL)
413    HYPRE_CUDA_CALL( hypre_CachingFreeManaged(ptr) );
414 #else
415    HYPRE_CUDA_CALL( cudaFree(ptr) );
416 #endif
417 #endif
418 
419 #if defined(HYPRE_USING_HIP)
420    HYPRE_HIP_CALL( hipFree(ptr) );
421 #endif
422 
423 #endif /* #if defined(HYPRE_USING_UMPIRE_UM) */
424 }
425 
426 static inline void
hypre_HostPinnedFree(void * ptr)427 hypre_HostPinnedFree(void *ptr)
428 {
429 #if defined(HYPRE_USING_UMPIRE_PINNED)
430    hypre_umpire_pinned_pooled_free(ptr);
431 #else
432 
433 #if defined(HYPRE_USING_DEVICE_OPENMP)
434    HYPRE_CUDA_CALL( cudaFreeHost(ptr) );
435 #endif
436 
437 #if defined(HYPRE_USING_CUDA)
438    HYPRE_CUDA_CALL( cudaFreeHost(ptr) );
439 #endif
440 
441 #if defined(HYPRE_USING_HIP)
442    HYPRE_HIP_CALL( hipHostFree(ptr) );
443 #endif
444 
445 #endif /* #if defined(HYPRE_USING_UMPIRE_PINNED) */
446 }
447 
448 static inline void
hypre_Free_core(void * ptr,hypre_MemoryLocation location)449 hypre_Free_core(void *ptr, hypre_MemoryLocation location)
450 {
451    if (!ptr)
452    {
453       return;
454    }
455 
456 #ifdef HYPRE_DEBUG
457    hypre_MemoryLocation tmp;
458    hypre_GetPointerLocation(ptr, &tmp);
459    /* do not use hypre_assert, which has alloc and free;
460     * will create an endless loop otherwise */
461    assert(location == tmp);
462 #endif
463 
464    switch (location)
465    {
466       case hypre_MEMORY_HOST :
467          hypre_HostFree(ptr);
468          break;
469       case hypre_MEMORY_DEVICE :
470          hypre_DeviceFree(ptr);
471          break;
472       case hypre_MEMORY_UNIFIED :
473          hypre_UnifiedFree(ptr);
474          break;
475       case hypre_MEMORY_HOST_PINNED :
476          hypre_HostPinnedFree(ptr);
477          break;
478       default :
479          hypre_WrongMemoryLocation();
480    }
481 }
482 
483 void
_hypre_Free(void * ptr,hypre_MemoryLocation location)484 _hypre_Free(void *ptr, hypre_MemoryLocation location)
485 {
486    hypre_Free_core(ptr, location);
487 }
488 
489 
490 /*--------------------------------------------------------------------------
491  * Memcpy
492  *--------------------------------------------------------------------------*/
493 static inline void
hypre_Memcpy_core(void * dst,void * src,size_t size,hypre_MemoryLocation loc_dst,hypre_MemoryLocation loc_src)494 hypre_Memcpy_core(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, hypre_MemoryLocation loc_src)
495 {
496    if (dst == NULL || src == NULL)
497    {
498       if (size)
499       {
500          hypre_printf("hypre_Memcpy warning: copy %ld bytes from %p to %p !\n", size, src, dst);
501          hypre_assert(0);
502       }
503 
504       return;
505    }
506 
507    if (dst == src)
508    {
509       return;
510    }
511 
512    /* Totally 4 x 4 = 16 cases */
513 
514    /* 4: Host   <-- Host, Host   <-- Pinned,
515     *    Pinned <-- Host, Pinned <-- Pinned.
516     */
517    if ( loc_dst != hypre_MEMORY_DEVICE && loc_dst != hypre_MEMORY_UNIFIED &&
518         loc_src != hypre_MEMORY_DEVICE && loc_src != hypre_MEMORY_UNIFIED )
519    {
520       memcpy(dst, src, size);
521       return;
522    }
523 
524 
525    /* 3: UVM <-- Device, Device <-- UVM, UVM <-- UVM */
526    if ( (loc_dst == hypre_MEMORY_UNIFIED && loc_src == hypre_MEMORY_DEVICE)  ||
527         (loc_dst == hypre_MEMORY_DEVICE  && loc_src == hypre_MEMORY_UNIFIED) ||
528         (loc_dst == hypre_MEMORY_UNIFIED && loc_src == hypre_MEMORY_UNIFIED) )
529    {
530 #if defined(HYPRE_USING_DEVICE_OPENMP)
531       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
532 #endif
533 
534 #if defined(HYPRE_USING_CUDA)
535       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
536 #endif
537 
538 #if defined(HYPRE_USING_HIP)
539       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToDevice) );
540 #endif
541       return;
542    }
543 
544 
545    /* 2: UVM <-- Host, UVM <-- Pinned */
546    if (loc_dst == hypre_MEMORY_UNIFIED)
547    {
548 #if defined(HYPRE_USING_DEVICE_OPENMP)
549       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
550 #endif
551 
552 #if defined(HYPRE_USING_CUDA)
553       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
554 #endif
555 
556 #if defined(HYPRE_USING_HIP)
557       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyHostToDevice) );
558 #endif
559       return;
560    }
561 
562 
563    /* 2: Host <-- UVM, Pinned <-- UVM */
564    if (loc_src == hypre_MEMORY_UNIFIED)
565    {
566 #if defined(HYPRE_USING_DEVICE_OPENMP)
567       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost) );
568 #endif
569 
570 #if defined(HYPRE_USING_CUDA)
571       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost) );
572 #endif
573 
574 #if defined(HYPRE_USING_HIP)
575       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToHost) );
576 #endif
577       return;
578    }
579 
580 
581    /* 2: Device <-- Host, Device <-- Pinned */
582    if ( loc_dst == hypre_MEMORY_DEVICE && (loc_src == hypre_MEMORY_HOST || loc_src == hypre_MEMORY_HOST_PINNED) )
583    {
584 #if defined(HYPRE_USING_DEVICE_OPENMP)
585 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
586       omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_device_num, hypre__offload_host_num);
587 #else
588       memcpy(dst, src, size);
589       HYPRE_OMPOffload(hypre__offload_device_num, dst, size, "update", "to");
590 #endif
591 #endif
592 
593 #if defined(HYPRE_USING_CUDA)
594       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
595 #endif
596 
597 #if defined(HYPRE_USING_HIP)
598       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyHostToDevice) );
599 #endif
600       return;
601    }
602 
603 
604    /* 2: Host <-- Device, Pinned <-- Device */
605    if ( (loc_dst == hypre_MEMORY_HOST || loc_dst == hypre_MEMORY_HOST_PINNED) && loc_src == hypre_MEMORY_DEVICE )
606    {
607 #if defined(HYPRE_USING_DEVICE_OPENMP)
608 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
609       omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_host_num, hypre__offload_device_num);
610 #else
611       HYPRE_OMPOffload(hypre__offload_device_num, src, size, "update", "from");
612       memcpy(dst, src, size);
613 #endif
614 #endif
615 
616 #if defined(HYPRE_USING_CUDA)
617       HYPRE_CUDA_CALL( cudaMemcpy( dst, src, size, cudaMemcpyDeviceToHost) );
618 #endif
619 
620 #if defined(HYPRE_USING_HIP)
621       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToHost) );
622 #endif
623       return;
624    }
625 
626 
627    /* 1: Device <-- Device */
628    if (loc_dst == hypre_MEMORY_DEVICE && loc_src == hypre_MEMORY_DEVICE)
629    {
630 #if defined(HYPRE_USING_DEVICE_OPENMP)
631 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
632       omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_device_num, hypre__offload_device_num);
633 #else
634       HYPRE_OMPOffload(hypre__offload_device_num, src, size, "update", "from");
635       memcpy(dst, src, size);
636       HYPRE_OMPOffload(hypre__offload_device_num, dst, size, "update", "to");
637 #endif
638 #endif
639 
640 #if defined(HYPRE_USING_CUDA)
641       HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
642 #endif
643 
644 #if defined(HYPRE_USING_HIP)
645       HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToDevice) );
646 #endif
647       return;
648    }
649 
650    hypre_WrongMemoryLocation();
651 }
652 
653 /*--------------------------------------------------------------------------*
654  * ExecPolicy
655  *--------------------------------------------------------------------------*/
656 static inline HYPRE_ExecutionPolicy
hypre_GetExecPolicy1_core(hypre_MemoryLocation location)657 hypre_GetExecPolicy1_core(hypre_MemoryLocation location)
658 {
659    HYPRE_ExecutionPolicy exec = HYPRE_EXEC_UNDEFINED;
660 
661    switch (location)
662    {
663       case hypre_MEMORY_HOST :
664       case hypre_MEMORY_HOST_PINNED :
665          exec = HYPRE_EXEC_HOST;
666          break;
667       case hypre_MEMORY_DEVICE :
668          exec = HYPRE_EXEC_DEVICE;
669          break;
670       case hypre_MEMORY_UNIFIED :
671 #if defined(HYPRE_USING_GPU)
672          exec = hypre_HandleDefaultExecPolicy(hypre_handle());
673 #endif
674          break;
675       default :
676          hypre_WrongMemoryLocation();
677    }
678 
679    hypre_assert(exec != HYPRE_EXEC_UNDEFINED);
680 
681    return exec;
682 }
683 
684 /* for binary operation */
685 static inline HYPRE_ExecutionPolicy
hypre_GetExecPolicy2_core(hypre_MemoryLocation location1,hypre_MemoryLocation location2)686 hypre_GetExecPolicy2_core(hypre_MemoryLocation location1,
687                           hypre_MemoryLocation location2)
688 {
689    HYPRE_ExecutionPolicy exec = HYPRE_EXEC_UNDEFINED;
690 
691    /* HOST_PINNED has the same exec policy as HOST */
692    if (location1 == hypre_MEMORY_HOST_PINNED)
693    {
694       location1 = hypre_MEMORY_HOST;
695    }
696 
697    if (location2 == hypre_MEMORY_HOST_PINNED)
698    {
699       location2 = hypre_MEMORY_HOST;
700    }
701 
702    /* no policy for these combinations */
703    if ( (location1 == hypre_MEMORY_HOST && location2 == hypre_MEMORY_DEVICE) ||
704         (location2 == hypre_MEMORY_HOST && location1 == hypre_MEMORY_DEVICE) )
705    {
706       exec = HYPRE_EXEC_UNDEFINED;
707    }
708 
709    /* this should never happen */
710    if ( (location1 == hypre_MEMORY_UNIFIED && location2 == hypre_MEMORY_DEVICE) ||
711         (location2 == hypre_MEMORY_UNIFIED && location1 == hypre_MEMORY_DEVICE) )
712    {
713       exec = HYPRE_EXEC_UNDEFINED;
714    }
715 
716    if (location1 == hypre_MEMORY_UNIFIED && location2 == hypre_MEMORY_UNIFIED)
717    {
718 #if defined(HYPRE_USING_GPU)
719       exec = hypre_HandleDefaultExecPolicy(hypre_handle());
720 #endif
721    }
722 
723    if (location1 == hypre_MEMORY_HOST || location2 == hypre_MEMORY_HOST)
724    {
725       exec = HYPRE_EXEC_HOST;
726    }
727 
728    if (location1 == hypre_MEMORY_DEVICE || location2 == hypre_MEMORY_DEVICE)
729    {
730       exec = HYPRE_EXEC_DEVICE;
731    }
732 
733    hypre_assert(exec != HYPRE_EXEC_UNDEFINED);
734 
735    return exec;
736 }
737 
738 /*==========================================================================
739  * Conceptual memory location (HYPRE_MemoryLocation) interface
740  *==========================================================================*/
741 
742 /*--------------------------------------------------------------------------
743  * hypre_Memset
744  * "Sets the first num bytes of the block of memory pointed by ptr to the specified value
745  * (*** value is interpreted as an unsigned char ***)"
746  * http://www.cplusplus.com/reference/cstring/memset/
747  *--------------------------------------------------------------------------*/
748 void *
hypre_Memset(void * ptr,HYPRE_Int value,size_t num,HYPRE_MemoryLocation location)749 hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location)
750 {
751    if (num == 0)
752    {
753       return ptr;
754    }
755 
756    if (ptr == NULL)
757    {
758       if (num)
759       {
760          hypre_printf("hypre_Memset warning: set values for %ld bytes at %p !\n", num, ptr);
761       }
762       return ptr;
763    }
764 
765    switch (hypre_GetActualMemLocation(location))
766    {
767       case hypre_MEMORY_HOST :
768       case hypre_MEMORY_HOST_PINNED :
769          hypre_HostMemset(ptr, value, num);
770          break;
771       case hypre_MEMORY_DEVICE :
772          hypre_DeviceMemset(ptr, value, num);
773          break;
774       case hypre_MEMORY_UNIFIED :
775          hypre_UnifiedMemset(ptr, value, num);
776          break;
777       default :
778          hypre_WrongMemoryLocation();
779    }
780 
781    return ptr;
782 }
783 
784 /*--------------------------------------------------------------------------
785  * Memprefetch
786  *--------------------------------------------------------------------------*/
787 void
hypre_MemPrefetch(void * ptr,size_t size,HYPRE_MemoryLocation location)788 hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location)
789 {
790    hypre_UnifiedMemPrefetch( ptr, size, hypre_GetActualMemLocation(location) );
791 }
792 
793 /*--------------------------------------------------------------------------*
794  * hypre_MAlloc, hypre_CAlloc
795  *--------------------------------------------------------------------------*/
796 
797 void *
hypre_MAlloc(size_t size,HYPRE_MemoryLocation location)798 hypre_MAlloc(size_t size, HYPRE_MemoryLocation location)
799 {
800    return hypre_MAlloc_core(size, 0, hypre_GetActualMemLocation(location));
801 }
802 
803 void *
hypre_CAlloc(size_t count,size_t elt_size,HYPRE_MemoryLocation location)804 hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location)
805 {
806    return hypre_MAlloc_core(count * elt_size, 1, hypre_GetActualMemLocation(location));
807 }
808 
809 /*--------------------------------------------------------------------------
810  * hypre_Free
811  *--------------------------------------------------------------------------*/
812 
813 void
hypre_Free(void * ptr,HYPRE_MemoryLocation location)814 hypre_Free(void *ptr, HYPRE_MemoryLocation location)
815 {
816    hypre_Free_core(ptr, hypre_GetActualMemLocation(location));
817 }
818 
819 /*--------------------------------------------------------------------------
820  * hypre_Memcpy
821  *--------------------------------------------------------------------------*/
822 
823 void
hypre_Memcpy(void * dst,void * src,size_t size,HYPRE_MemoryLocation loc_dst,HYPRE_MemoryLocation loc_src)824 hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, HYPRE_MemoryLocation loc_src)
825 {
826    hypre_Memcpy_core( dst, src, size, hypre_GetActualMemLocation(loc_dst), hypre_GetActualMemLocation(loc_src) );
827 }
828 
829 /*--------------------------------------------------------------------------
830  * hypre_ReAlloc
831  *--------------------------------------------------------------------------*/
832 void *
hypre_ReAlloc(void * ptr,size_t size,HYPRE_MemoryLocation location)833 hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location)
834 {
835    if (size == 0)
836    {
837       hypre_Free(ptr, location);
838       return NULL;
839    }
840 
841    if (ptr == NULL)
842    {
843       return hypre_MAlloc(size, location);
844    }
845 
846    if (hypre_GetActualMemLocation(location) != hypre_MEMORY_HOST)
847    {
848       hypre_printf("hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n");
849       hypre_assert(0);
850       hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1);
851       return NULL;
852    }
853 
854 #if defined(HYPRE_USING_UMPIRE_HOST)
855    ptr = hypre_umpire_host_pooled_realloc(ptr, size);
856 #else
857    ptr = realloc(ptr, size);
858 #endif
859 
860    if (!ptr)
861    {
862       hypre_OutOfMemory(size);
863    }
864 
865    return ptr;
866 }
867 
868 void *
hypre_ReAlloc_v2(void * ptr,size_t old_size,size_t new_size,HYPRE_MemoryLocation location)869 hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location)
870 {
871    if (new_size == 0)
872    {
873       hypre_Free(ptr, location);
874       return NULL;
875    }
876 
877    if (ptr == NULL)
878    {
879       return hypre_MAlloc(new_size, location);
880    }
881 
882    void *new_ptr = hypre_MAlloc(new_size, location);
883    size_t smaller_size = new_size > old_size ? old_size : new_size;
884    hypre_Memcpy(new_ptr, ptr, smaller_size, location, location);
885    hypre_Free(ptr, location);
886    ptr = new_ptr;
887 
888    if (!ptr)
889    {
890       hypre_OutOfMemory(new_size);
891    }
892 
893    return ptr;
894 }
895 
896 /*--------------------------------------------------------------------------*
897  * hypre_GetExecPolicy: return execution policy based on memory locations
898  *--------------------------------------------------------------------------*/
899 /* for unary operation */
900 HYPRE_ExecutionPolicy
hypre_GetExecPolicy1(HYPRE_MemoryLocation location)901 hypre_GetExecPolicy1(HYPRE_MemoryLocation location)
902 {
903 
904    return hypre_GetExecPolicy1_core(hypre_GetActualMemLocation(location));
905 }
906 
907 /* for binary operation */
908 HYPRE_ExecutionPolicy
hypre_GetExecPolicy2(HYPRE_MemoryLocation location1,HYPRE_MemoryLocation location2)909 hypre_GetExecPolicy2(HYPRE_MemoryLocation location1,
910                      HYPRE_MemoryLocation location2)
911 {
912    return hypre_GetExecPolicy2_core(hypre_GetActualMemLocation(location1),
913                                     hypre_GetActualMemLocation(location2));
914 }
915 
916 /*--------------------------------------------------------------------------
917  * Query the actual memory location pointed by ptr
918  *--------------------------------------------------------------------------*/
919 HYPRE_Int
hypre_GetPointerLocation(const void * ptr,hypre_MemoryLocation * memory_location)920 hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location)
921 {
922    HYPRE_Int ierr = 0;
923 
924 #if defined(HYPRE_USING_GPU)
925    *memory_location = hypre_MEMORY_UNDEFINED;
926 
927 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
928    struct cudaPointerAttributes attr;
929 
930 #if (CUDART_VERSION >= 10000)
931 #if (CUDART_VERSION >= 11000)
932    HYPRE_CUDA_CALL( cudaPointerGetAttributes(&attr, ptr) );
933 #else
934    cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
935    if (err != cudaSuccess)
936    {
937       ierr = 1;
938       /* clear the error */
939       cudaGetLastError();
940    }
941 #endif
942    if (attr.type == cudaMemoryTypeUnregistered)
943    {
944       *memory_location = hypre_MEMORY_HOST;
945    }
946    else if (attr.type == cudaMemoryTypeHost)
947    {
948       *memory_location = hypre_MEMORY_HOST_PINNED;
949    }
950    else if (attr.type == cudaMemoryTypeDevice)
951    {
952       *memory_location = hypre_MEMORY_DEVICE;
953    }
954    else if (attr.type == cudaMemoryTypeManaged)
955    {
956       *memory_location = hypre_MEMORY_UNIFIED;
957    }
958 #else
959    cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
960    if (err != cudaSuccess)
961    {
962       ierr = 1;
963 
964       /* clear the error */
965       cudaGetLastError();
966 
967       if (err == cudaErrorInvalidValue)
968       {
969          *memory_location = hypre_MEMORY_HOST;
970       }
971    }
972    else if (attr.isManaged)
973    {
974       *memory_location = hypre_MEMORY_UNIFIED;
975    }
976    else if (attr.memoryType == cudaMemoryTypeDevice)
977    {
978       *memory_location = hypre_MEMORY_DEVICE;
979    }
980    else if (attr.memoryType == cudaMemoryTypeHost)
981    {
982       *memory_location = hypre_MEMORY_HOST_PINNED;
983    }
984 #endif // CUDART_VERSION >= 10000
985 #endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
986 
987 #if defined(HYPRE_USING_HIP)
988 
989    struct hipPointerAttribute_t attr;
990    *memory_location = hypre_MEMORY_UNDEFINED;
991 
992    hipError_t err = hipPointerGetAttributes(&attr, ptr);
993    if (err != hipSuccess)
994    {
995       ierr = 1;
996 
997       /* clear the error */
998       hipGetLastError();
999 
1000       if (err == hipErrorInvalidValue)
1001       {
1002          *memory_location = hypre_MEMORY_HOST;
1003       }
1004    }
1005    else if (attr.isManaged)
1006    {
1007       *memory_location = hypre_MEMORY_UNIFIED;
1008    }
1009    else if (attr.memoryType == hipMemoryTypeDevice)
1010    {
1011       *memory_location = hypre_MEMORY_DEVICE;
1012    }
1013    else if (attr.memoryType == hipMemoryTypeHost)
1014    {
1015       *memory_location = hypre_MEMORY_HOST_PINNED;
1016    }
1017 #endif // defined(HYPRE_USING_HIP)
1018 
1019 #else /* #if defined(HYPRE_USING_GPU) */
1020    *memory_location = hypre_MEMORY_HOST;
1021 #endif
1022 
1023    return ierr;
1024 }
1025 
1026 #ifdef HYPRE_USING_MEMORY_TRACKER
1027 
1028 /*--------------------------------------------------------------------------
1029  * Memory tracker
1030  * do not use hypre_T* in the following since we don't want to track them   *
1031  *--------------------------------------------------------------------------*/
1032 hypre_MemoryTracker *
hypre_MemoryTrackerCreate()1033 hypre_MemoryTrackerCreate()
1034 {
1035    hypre_MemoryTracker *ptr = (hypre_MemoryTracker *) calloc(1, sizeof(hypre_MemoryTracker));
1036    return ptr;
1037 }
1038 
1039 void
hypre_MemoryTrackerDestroy(hypre_MemoryTracker * tracker)1040 hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker)
1041 {
1042    if (tracker)
1043    {
1044       free(tracker->data);
1045       free(tracker);
1046    }
1047 }
1048 
1049 void
hypre_MemoryTrackerInsert(const char * action,void * ptr,size_t nbytes,hypre_MemoryLocation memory_location,const char * filename,const char * function,HYPRE_Int line)1050 hypre_MemoryTrackerInsert(const char           *action,
1051                           void                 *ptr,
1052                           size_t                nbytes,
1053                           hypre_MemoryLocation  memory_location,
1054                           const char           *filename,
1055                           const char           *function,
1056                           HYPRE_Int             line)
1057 {
1058 
1059    if (ptr == NULL)
1060    {
1061       return;
1062    }
1063 
1064    hypre_MemoryTracker *tracker = hypre_memory_tracker();
1065 
1066    if (tracker->alloced_size <= tracker->actual_size)
1067    {
1068       tracker->alloced_size = 2 * tracker->alloced_size + 1;
1069       tracker->data = (hypre_MemoryTrackerEntry *) realloc(tracker->data, tracker->alloced_size * sizeof(hypre_MemoryTrackerEntry));
1070    }
1071 
1072    hypre_assert(tracker->actual_size < tracker->alloced_size);
1073 
1074    hypre_MemoryTrackerEntry *entry = tracker->data + tracker->actual_size;
1075 
1076    sprintf(entry->_action, "%s", action);
1077    entry->_ptr = ptr;
1078    entry->_nbytes = nbytes;
1079    entry->_memory_location = memory_location;
1080    sprintf(entry->_filename, "%s", filename);
1081    sprintf(entry->_function, "%s", function);
1082    entry->_line = line;
1083    /* -1 is the initial value */
1084    entry->_pair = (size_t) -1;
1085 
1086    tracker->actual_size ++;
1087 }
1088 
1089 
1090 /* do not use hypre_printf, hypre_fprintf, which have TAlloc
1091  * endless loop "for (i = 0; i < tracker->actual_size; i++)" otherwise */
1092 HYPRE_Int
hypre_PrintMemoryTracker()1093 hypre_PrintMemoryTracker()
1094 {
1095    HYPRE_Int myid, ierr = 0;
1096    char filename[256];
1097    FILE *file;
1098    size_t i, j;
1099 
1100    hypre_MemoryTracker *tracker = hypre_memory_tracker();
1101 
1102    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
1103    hypre_sprintf(filename,"HypreMemoryTrack.log.%05d", myid);
1104    if ((file = fopen(filename, "a")) == NULL)
1105    {
1106       fprintf(stderr, "Error: can't open output file %s\n", filename);
1107       return hypre_error_flag;
1108    }
1109 
1110    fprintf(file, "==== Operations:\n");
1111    fprintf(file, "     ID        EVENT                 ADDRESS       BYTE         LOCATION                                       FILE(LINE)                                           FUNCTION  |  Memory (   H            P            D            U )\n");
1112 
1113    size_t totl_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1114    size_t peak_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1115    size_t curr_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1116 
1117    for (i = 0; i < tracker->actual_size; i++)
1118    {
1119       if (strstr(tracker->data[i]._action, "alloc") != NULL)
1120       {
1121          totl_bytes[tracker->data[i]._memory_location] += tracker->data[i]._nbytes;
1122          curr_bytes[tracker->data[i]._memory_location] += tracker->data[i]._nbytes;
1123          peak_bytes[tracker->data[i]._memory_location] =
1124             hypre_max( curr_bytes[tracker->data[i]._memory_location],
1125                        peak_bytes[tracker->data[i]._memory_location] );
1126 
1127          /* for each unpaired "alloc", find its "free" */
1128          if (tracker->data[i]._pair != (size_t) -1)
1129          {
1130             if ( tracker->data[i]._pair >= tracker->actual_size ||
1131                  tracker->data[tracker->data[i]._pair]._pair != i)
1132             {
1133                fprintf(stderr, "hypre memory tracker internal error!\n");
1134                hypre_MPI_Abort(hypre_MPI_COMM_WORLD, 1);
1135             }
1136 
1137             continue;
1138          }
1139 
1140          for (j = i+1; j < tracker->actual_size; j++)
1141          {
1142             if ( strstr(tracker->data[j]._action, "free") != NULL &&
1143                  tracker->data[j]._pair == (size_t) -1 &&
1144                  tracker->data[i]._ptr == tracker->data[j]._ptr &&
1145                  tracker->data[i]._memory_location == tracker->data[j]._memory_location )
1146             {
1147                tracker->data[i]._pair = j;
1148                tracker->data[j]._pair = i;
1149                tracker->data[j]._nbytes = tracker->data[i]._nbytes;
1150                break;
1151             }
1152          }
1153 
1154          if (tracker->data[i]._pair == (size_t) -1)
1155          {
1156             fprintf(stderr, "%6zu: %16p may not freed\n", i, tracker->data[i]._ptr );
1157          }
1158       }
1159       else if (strstr(tracker->data[i]._action, "free") != NULL)
1160       {
1161          size_t pair = tracker->data[i]._pair;
1162 
1163          if (pair == (size_t) -1)
1164          {
1165             fprintf(stderr, "%6zu: unpaired free at %16p\n", i, tracker->data[i]._ptr );
1166          }
1167          else
1168          {
1169             curr_bytes[tracker->data[i]._memory_location] -= tracker->data[pair]._nbytes;
1170          }
1171       }
1172 
1173       if (i < tracker->prev_end)
1174       {
1175          continue;
1176       }
1177 
1178       char memory_location[256];
1179       char nbytes[32];
1180 
1181       if (tracker->data[i]._memory_location == hypre_MEMORY_HOST)
1182       {
1183          sprintf(memory_location, "%s", "HOST");
1184       }
1185       else if (tracker->data[i]._memory_location == hypre_MEMORY_HOST_PINNED)
1186       {
1187          sprintf(memory_location, "%s", "HOST_PINNED");
1188       }
1189       else if (tracker->data[i]._memory_location == hypre_MEMORY_DEVICE)
1190       {
1191          sprintf(memory_location, "%s", "DEVICE");
1192       }
1193       else if (tracker->data[i]._memory_location == hypre_MEMORY_UNIFIED)
1194       {
1195          sprintf(memory_location, "%s", "UNIFIED");
1196       }
1197       else
1198       {
1199          sprintf(memory_location, "%s", "UNDEFINED");
1200       }
1201 
1202       if (tracker->data[i]._nbytes != (size_t) -1)
1203       {
1204          sprintf(nbytes, "%zu", tracker->data[i]._nbytes);
1205       }
1206       else
1207       {
1208          sprintf(nbytes, "%s", "");
1209       }
1210 
1211       fprintf(file, " %6zu %12s        %16p %10s %16s %40s (%5d) %50s  |  %12zu %12zu %12zu %12zu\n",
1212               i,
1213               tracker->data[i]._action,
1214               tracker->data[i]._ptr,
1215               nbytes,
1216               memory_location,
1217               tracker->data[i]._filename,
1218               tracker->data[i]._line,
1219               tracker->data[i]._function,
1220               curr_bytes[hypre_MEMORY_HOST],
1221               curr_bytes[hypre_MEMORY_HOST_PINNED],
1222               curr_bytes[hypre_MEMORY_DEVICE],
1223               curr_bytes[hypre_MEMORY_UNIFIED]
1224               );
1225    }
1226 
1227    fprintf(file, "\n==== Total allocated (byte):\n");
1228    fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1229            totl_bytes[hypre_MEMORY_HOST],
1230            totl_bytes[hypre_MEMORY_HOST_PINNED],
1231            totl_bytes[hypre_MEMORY_DEVICE],
1232            totl_bytes[hypre_MEMORY_UNIFIED]);
1233 
1234    fprintf(file, "\n==== Peak (byte):\n");
1235    fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1236            peak_bytes[hypre_MEMORY_HOST],
1237            peak_bytes[hypre_MEMORY_HOST_PINNED],
1238            peak_bytes[hypre_MEMORY_DEVICE],
1239            peak_bytes[hypre_MEMORY_UNIFIED]);
1240 
1241    fprintf(file, "\n==== Reachable (byte):\n");
1242    fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1243            curr_bytes[hypre_MEMORY_HOST],
1244            curr_bytes[hypre_MEMORY_HOST_PINNED],
1245            curr_bytes[hypre_MEMORY_DEVICE],
1246            curr_bytes[hypre_MEMORY_UNIFIED]);
1247 
1248    fprintf(file, "\n==== Warnings:\n");
1249    for (i = 0; i < tracker->actual_size; i++)
1250    {
1251       if (tracker->data[i]._pair == (size_t) -1)
1252       {
1253          if (strstr(tracker->data[i]._action, "alloc") != NULL)
1254          {
1255             fprintf(file, "%6zu: %p may have not been freed\n", i, tracker->data[i]._ptr );
1256          }
1257          else if (strstr(tracker->data[i]._action, "free") != NULL)
1258          {
1259             fprintf(file, "%6zu: unpaired free at %16p\n", i, tracker->data[i]._ptr );
1260          }
1261       }
1262    }
1263 
1264    fclose(file);
1265 
1266    tracker->prev_end = tracker->actual_size;
1267 
1268    return ierr;
1269 }
1270 #endif
1271 
1272 /*--------------------------------------------------------------------------*
1273  * Memory Pool
1274  *--------------------------------------------------------------------------*/
1275 
1276 HYPRE_Int
hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,hypre_uint cub_min_bin,hypre_uint cub_max_bin,size_t cub_max_cached_bytes)1277 hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,
1278                         hypre_uint cub_min_bin,
1279                         hypre_uint cub_max_bin,
1280                         size_t     cub_max_cached_bytes)
1281 {
1282 #if defined(HYPRE_USING_CUDA)
1283 #ifdef HYPRE_USING_DEVICE_POOL
1284    hypre_HandleCubBinGrowth(hypre_handle())      = cub_bin_growth;
1285    hypre_HandleCubMinBin(hypre_handle())         = cub_min_bin;
1286    hypre_HandleCubMaxBin(hypre_handle())         = cub_max_bin;
1287    hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes;
1288 
1289    //TODO XXX RL: cub_min_bin, cub_max_bin are not (re)set
1290    if (hypre_HandleCubDevAllocator(hypre_handle()))
1291    {
1292       hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);
1293    }
1294 
1295    if (hypre_HandleCubUvmAllocator(hypre_handle()))
1296    {
1297       hypre_HandleCubUvmAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);
1298    }
1299 #endif
1300 #endif
1301 
1302    return hypre_error_flag;
1303 }
1304 
1305 HYPRE_Int
HYPRE_SetGPUMemoryPoolSize(HYPRE_Int bin_growth,HYPRE_Int min_bin,HYPRE_Int max_bin,size_t max_cached_bytes)1306 HYPRE_SetGPUMemoryPoolSize(HYPRE_Int bin_growth,
1307                            HYPRE_Int min_bin,
1308                            HYPRE_Int max_bin,
1309                            size_t    max_cached_bytes)
1310 {
1311    return hypre_SetCubMemPoolSize(bin_growth, min_bin, max_bin, max_cached_bytes);
1312 }
1313 
1314 #ifdef HYPRE_USING_DEVICE_POOL
1315 cudaError_t
hypre_CachingMallocDevice(void ** ptr,size_t nbytes)1316 hypre_CachingMallocDevice(void **ptr, size_t nbytes)
1317 {
1318    if (!hypre_HandleCubDevAllocator(hypre_handle()))
1319    {
1320       hypre_HandleCubDevAllocator(hypre_handle()) =
1321          hypre_CudaDataCubCachingAllocatorCreate( hypre_HandleCubBinGrowth(hypre_handle()),
1322                                                   hypre_HandleCubMinBin(hypre_handle()),
1323                                                   hypre_HandleCubMaxBin(hypre_handle()),
1324                                                   hypre_HandleCubMaxCachedBytes(hypre_handle()),
1325                                                   false,
1326                                                   false,
1327                                                   false );
1328    }
1329 
1330    return hypre_HandleCubDevAllocator(hypre_handle()) -> DeviceAllocate(ptr, nbytes);
1331 }
1332 
1333 cudaError_t
hypre_CachingFreeDevice(void * ptr)1334 hypre_CachingFreeDevice(void *ptr)
1335 {
1336    return hypre_HandleCubDevAllocator(hypre_handle()) -> DeviceFree(ptr);
1337 }
1338 
1339 cudaError_t
hypre_CachingMallocManaged(void ** ptr,size_t nbytes)1340 hypre_CachingMallocManaged(void **ptr, size_t nbytes)
1341 {
1342    if (!hypre_HandleCubUvmAllocator(hypre_handle()))
1343    {
1344       hypre_HandleCubUvmAllocator(hypre_handle()) =
1345          hypre_CudaDataCubCachingAllocatorCreate( hypre_HandleCubBinGrowth(hypre_handle()),
1346                                                   hypre_HandleCubMinBin(hypre_handle()),
1347                                                   hypre_HandleCubMaxBin(hypre_handle()),
1348                                                   hypre_HandleCubMaxCachedBytes(hypre_handle()),
1349                                                   false,
1350                                                   false,
1351                                                   true );
1352    }
1353 
1354    return hypre_HandleCubUvmAllocator(hypre_handle()) -> DeviceAllocate(ptr, nbytes);
1355 }
1356 
1357 cudaError_t
hypre_CachingFreeManaged(void * ptr)1358 hypre_CachingFreeManaged(void *ptr)
1359 {
1360    return hypre_HandleCubUvmAllocator(hypre_handle()) -> DeviceFree(ptr);
1361 }
1362 
1363 hypre_cub_CachingDeviceAllocator *
hypre_CudaDataCubCachingAllocatorCreate(hypre_uint bin_growth,hypre_uint min_bin,hypre_uint max_bin,size_t max_cached_bytes,bool skip_cleanup,bool debug,bool use_managed_memory)1364 hypre_CudaDataCubCachingAllocatorCreate(hypre_uint bin_growth,
1365                                         hypre_uint min_bin,
1366                                         hypre_uint max_bin,
1367                                         size_t     max_cached_bytes,
1368                                         bool       skip_cleanup,
1369                                         bool       debug,
1370                                         bool       use_managed_memory)
1371 {
1372    hypre_cub_CachingDeviceAllocator *allocator =
1373       new hypre_cub_CachingDeviceAllocator( bin_growth,
1374                                             min_bin,
1375                                             max_bin,
1376                                             max_cached_bytes,
1377                                             skip_cleanup,
1378                                             debug,
1379                                             use_managed_memory );
1380 
1381    return allocator;
1382 }
1383 
1384 void
hypre_CudaDataCubCachingAllocatorDestroy(hypre_CudaData * data)1385 hypre_CudaDataCubCachingAllocatorDestroy(hypre_CudaData *data)
1386 {
1387    delete hypre_CudaDataCubDevAllocator(data);
1388    delete hypre_CudaDataCubUvmAllocator(data);
1389 }
1390 
1391 #endif // #ifdef HYPRE_USING_DEVICE_POOL
1392 
1393 #if defined(HYPRE_USING_UMPIRE_HOST)
1394 HYPRE_Int
hypre_umpire_host_pooled_allocate(void ** ptr,size_t nbytes)1395 hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes)
1396 {
1397    hypre_Handle *handle = hypre_handle();
1398    const char *resource_name = "HOST";
1399    const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1400 
1401    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1402    umpire_allocator pooled_allocator;
1403 
1404    if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1405    {
1406       umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1407    }
1408    else
1409    {
1410       umpire_allocator allocator;
1411       umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1412       umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1413                                                  hypre_HandleUmpireHostPoolSize(handle),
1414                                                  hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1415       hypre_HandleOwnUmpireHostPool(handle) = 1;
1416    }
1417 
1418    *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1419 
1420    return hypre_error_flag;
1421 }
1422 
1423 HYPRE_Int
hypre_umpire_host_pooled_free(void * ptr)1424 hypre_umpire_host_pooled_free(void *ptr)
1425 {
1426    hypre_Handle *handle = hypre_handle();
1427    const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1428    umpire_allocator pooled_allocator;
1429 
1430    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1431 
1432    hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1433 
1434    umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1435    umpire_allocator_deallocate(&pooled_allocator, ptr);
1436 
1437    return hypre_error_flag;
1438 }
1439 
1440 void *
hypre_umpire_host_pooled_realloc(void * ptr,size_t size)1441 hypre_umpire_host_pooled_realloc(void *ptr, size_t size)
1442 {
1443    hypre_Handle *handle = hypre_handle();
1444    const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1445    umpire_allocator pooled_allocator;
1446 
1447    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1448 
1449    hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1450 
1451    umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1452    ptr = umpire_resourcemanager_reallocate_with_allocator(rm_ptr, ptr, size, pooled_allocator);
1453 
1454    return ptr;
1455 }
1456 #endif
1457 
1458 #if defined(HYPRE_USING_UMPIRE_DEVICE)
1459 HYPRE_Int
hypre_umpire_device_pooled_allocate(void ** ptr,size_t nbytes)1460 hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes)
1461 {
1462    hypre_Handle *handle = hypre_handle();
1463    const hypre_int device_id = hypre_HandleCudaDevice(handle);
1464    char resource_name[16];
1465    const char *pool_name = hypre_HandleUmpireDevicePoolName(handle);
1466 
1467    hypre_sprintf(resource_name, "%s::%d", "DEVICE", device_id);
1468 
1469    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1470    umpire_allocator pooled_allocator;
1471 
1472    if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1473    {
1474       umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1475    }
1476    else
1477    {
1478       umpire_allocator allocator;
1479       umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1480       umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1481                                                  hypre_HandleUmpireDevicePoolSize(handle),
1482                                                  hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1483 
1484       hypre_HandleOwnUmpireDevicePool(handle) = 1;
1485    }
1486 
1487    *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1488 
1489    return hypre_error_flag;
1490 }
1491 
1492 HYPRE_Int
hypre_umpire_device_pooled_free(void * ptr)1493 hypre_umpire_device_pooled_free(void *ptr)
1494 {
1495    hypre_Handle *handle = hypre_handle();
1496    const char *pool_name = hypre_HandleUmpireDevicePoolName(handle);
1497    umpire_allocator pooled_allocator;
1498 
1499    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1500 
1501    hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1502 
1503    umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1504    umpire_allocator_deallocate(&pooled_allocator, ptr);
1505 
1506    return hypre_error_flag;
1507 }
1508 #endif
1509 
1510 #if defined(HYPRE_USING_UMPIRE_UM)
1511 HYPRE_Int
hypre_umpire_um_pooled_allocate(void ** ptr,size_t nbytes)1512 hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes)
1513 {
1514    hypre_Handle *handle = hypre_handle();
1515    const char *resource_name = "UM";
1516    const char *pool_name = hypre_HandleUmpireUMPoolName(handle);
1517 
1518    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1519    umpire_allocator pooled_allocator;
1520 
1521    if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1522    {
1523       umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1524    }
1525    else
1526    {
1527       umpire_allocator allocator;
1528       umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1529       umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1530                                                  hypre_HandleUmpireUMPoolSize(handle),
1531                                                  hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1532 
1533       hypre_HandleOwnUmpireUMPool(handle) = 1;
1534    }
1535 
1536    *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1537 
1538    return hypre_error_flag;
1539 }
1540 
1541 HYPRE_Int
hypre_umpire_um_pooled_free(void * ptr)1542 hypre_umpire_um_pooled_free(void *ptr)
1543 {
1544    hypre_Handle *handle = hypre_handle();
1545    const char *pool_name = hypre_HandleUmpireUMPoolName(handle);
1546    umpire_allocator pooled_allocator;
1547 
1548    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1549 
1550    hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1551 
1552    umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1553    umpire_allocator_deallocate(&pooled_allocator, ptr);
1554 
1555    return hypre_error_flag;
1556 }
1557 #endif
1558 
1559 #if defined(HYPRE_USING_UMPIRE_PINNED)
1560 HYPRE_Int
hypre_umpire_pinned_pooled_allocate(void ** ptr,size_t nbytes)1561 hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes)
1562 {
1563    hypre_Handle *handle = hypre_handle();
1564    const char *resource_name = "PINNED";
1565    const char *pool_name = hypre_HandleUmpirePinnedPoolName(handle);
1566 
1567    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1568    umpire_allocator pooled_allocator;
1569 
1570    if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1571    {
1572       umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1573    }
1574    else
1575    {
1576       umpire_allocator allocator;
1577       umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1578       umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1579                                                  hypre_HandleUmpirePinnedPoolSize(handle),
1580                                                  hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1581 
1582       hypre_HandleOwnUmpirePinnedPool(handle) = 1;
1583    }
1584 
1585    *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1586 
1587    return hypre_error_flag;
1588 }
1589 
1590 HYPRE_Int
hypre_umpire_pinned_pooled_free(void * ptr)1591 hypre_umpire_pinned_pooled_free(void *ptr)
1592 {
1593    const hypre_Handle *handle = hypre_handle();
1594    const char *pool_name = hypre_HandleUmpirePinnedPoolName(handle);
1595    umpire_allocator pooled_allocator;
1596 
1597    umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1598 
1599    hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1600 
1601    umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1602    umpire_allocator_deallocate(&pooled_allocator, ptr);
1603 
1604    return hypre_error_flag;
1605 }
1606 #endif
1607 
1608