1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Memory management utilities
11 *
12 *****************************************************************************/
13
14 #include "_hypre_utilities.h"
15 #include "_hypre_utilities.hpp"
16
17 #ifdef HYPRE_USE_UMALLOC
18 #undef HYPRE_USE_UMALLOC
19 #endif
20
21 /******************************************************************************
22 *
23 * Helper routines
24 *
25 *****************************************************************************/
26
27 /*--------------------------------------------------------------------------
28 * hypre_OutOfMemory
29 *--------------------------------------------------------------------------*/
30 static inline void
hypre_OutOfMemory(size_t size)31 hypre_OutOfMemory(size_t size)
32 {
33 hypre_error_w_msg(HYPRE_ERROR_MEMORY,"Out of memory trying to allocate too many bytes\n");
34 hypre_assert(0);
35 fflush(stdout);
36 }
37
38 static inline void
hypre_WrongMemoryLocation()39 hypre_WrongMemoryLocation()
40 {
41 hypre_error_w_msg(HYPRE_ERROR_MEMORY, "Wrong HYPRE MEMORY location: Only HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE and HYPRE_MEMORY_HOST_PINNED are supported!\n");
42 hypre_assert(0);
43 fflush(stdout);
44 }
45
46 /*==========================================================================
47 * Physical memory location (hypre_MemoryLocation) interface
48 *==========================================================================*/
49
50 /*--------------------------------------------------------------------------
51 * Memset
52 *--------------------------------------------------------------------------*/
53 static inline void
hypre_HostMemset(void * ptr,HYPRE_Int value,size_t num)54 hypre_HostMemset(void *ptr, HYPRE_Int value, size_t num)
55 {
56 memset(ptr, value, num);
57 }
58
59 static inline void
hypre_DeviceMemset(void * ptr,HYPRE_Int value,size_t num)60 hypre_DeviceMemset(void *ptr, HYPRE_Int value, size_t num)
61 {
62 #if defined(HYPRE_USING_DEVICE_OPENMP)
63 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
64 HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
65 #else
66 memset(ptr, value, num);
67 HYPRE_OMPOffload(hypre__offload_device_num, ptr, num, "update", "to");
68 #endif
69 HYPRE_CUDA_CALL( cudaDeviceSynchronize() );
70 #endif
71
72 #if defined(HYPRE_USING_CUDA)
73 HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
74 #endif
75
76 #if defined(HYPRE_USING_HIP)
77 HYPRE_HIP_CALL( hipMemset(ptr, value, num) );
78 #endif
79 }
80
81 static inline void
hypre_UnifiedMemset(void * ptr,HYPRE_Int value,size_t num)82 hypre_UnifiedMemset(void *ptr, HYPRE_Int value, size_t num)
83 {
84 #if defined(HYPRE_USING_DEVICE_OPENMP)
85 HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
86 HYPRE_CUDA_CALL( cudaDeviceSynchronize() );
87 #endif
88
89 #if defined(HYPRE_USING_CUDA)
90 HYPRE_CUDA_CALL( cudaMemset(ptr, value, num) );
91 #endif
92
93 #if defined(HYPRE_USING_HIP)
94 HYPRE_HIP_CALL( hipMemset(ptr, value, num) );
95 #endif
96 }
97
98 /*--------------------------------------------------------------------------
99 * Memprefetch
100 *--------------------------------------------------------------------------*/
101 static inline void
hypre_UnifiedMemPrefetch(void * ptr,size_t size,hypre_MemoryLocation location)102 hypre_UnifiedMemPrefetch(void *ptr, size_t size, hypre_MemoryLocation location)
103 {
104 #if defined(HYPRE_USING_GPU)
105 #ifdef HYPRE_DEBUG
106 hypre_MemoryLocation tmp;
107 hypre_GetPointerLocation(ptr, &tmp);
108 /* do not use hypre_assert, which has alloc and free;
109 * will create an endless loop otherwise */
110 assert(hypre_MEMORY_UNIFIED == tmp);
111 #endif
112 #endif
113
114 #if defined(HYPRE_USING_DEVICE_OPENMP)
115 if (location == hypre_MEMORY_DEVICE)
116 {
117 HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
118 hypre_HandleCudaComputeStream(hypre_handle())) );
119 }
120 else if (location == hypre_MEMORY_HOST)
121 {
122 HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
123 hypre_HandleCudaComputeStream(hypre_handle())) );
124 }
125 #endif
126
127 #if defined(HYPRE_USING_CUDA)
128 if (location == hypre_MEMORY_DEVICE)
129 {
130 HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
131 hypre_HandleCudaComputeStream(hypre_handle())) );
132 }
133 else if (location == hypre_MEMORY_HOST)
134 {
135 HYPRE_CUDA_CALL( cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
136 hypre_HandleCudaComputeStream(hypre_handle())) );
137 }
138 #endif
139
140 #if defined(HYPRE_USING_HIP)
141 // Not currently implemented for HIP, but leaving place holder
142 /*
143 *if (location == hypre_MEMORY_DEVICE)
144 *{
145 * HYPRE_HIP_CALL( hipMemPrefetchAsync(ptr, size, hypre_HandleCudaDevice(hypre_handle()),
146 * hypre_HandleCudaComputeStream(hypre_handle())) );
147 *}
148 *else if (location == hypre_MEMORY_HOST)
149 *{
150 * HYPRE_CUDA_CALL( hipMemPrefetchAsync(ptr, size, cudaCpuDeviceId,
151 * hypre_HandleCudaComputeStream(hypre_handle())) );
152 *}
153 */
154 #endif
155 }
156
157 /*--------------------------------------------------------------------------
158 * Malloc
159 *--------------------------------------------------------------------------*/
160 static inline void *
hypre_HostMalloc(size_t size,HYPRE_Int zeroinit)161 hypre_HostMalloc(size_t size, HYPRE_Int zeroinit)
162 {
163 void *ptr = NULL;
164
165 #if defined(HYPRE_USING_UMPIRE_HOST)
166 hypre_umpire_host_pooled_allocate(&ptr, size);
167 if (zeroinit)
168 {
169 memset(ptr, 0, size);
170 }
171 #else
172 if (zeroinit)
173 {
174 ptr = calloc(size, 1);
175 }
176 else
177 {
178 ptr = malloc(size);
179 }
180 #endif
181
182 return ptr;
183 }
184
185 static inline void *
hypre_DeviceMalloc(size_t size,HYPRE_Int zeroinit)186 hypre_DeviceMalloc(size_t size, HYPRE_Int zeroinit)
187 {
188 void *ptr = NULL;
189
190 if ( hypre_HandleUserDeviceMalloc(hypre_handle()) )
191 {
192 hypre_HandleUserDeviceMalloc(hypre_handle())(&ptr, size);
193 }
194 else
195 {
196 #if defined(HYPRE_USING_UMPIRE_DEVICE)
197 hypre_umpire_device_pooled_allocate(&ptr, size);
198 #else
199
200 #if defined(HYPRE_USING_DEVICE_OPENMP)
201 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
202 ptr = omp_target_alloc(size, hypre__offload_device_num);
203 #else
204 ptr = malloc(size + sizeof(size_t));
205 size_t *sp = (size_t*) ptr;
206 sp[0] = size;
207 ptr = (void *) (&sp[1]);
208 HYPRE_OMPOffload(hypre__offload_device_num, ptr, size, "enter", "alloc");
209 #endif
210 #endif
211
212 #if defined(HYPRE_USING_CUDA)
213 #if defined(HYPRE_USING_DEVICE_POOL)
214 HYPRE_CUDA_CALL( hypre_CachingMallocDevice(&ptr, size) );
215 #else
216 HYPRE_CUDA_CALL( cudaMalloc(&ptr, size) );
217 #endif
218 #endif
219
220 #if defined(HYPRE_USING_HIP)
221 HYPRE_HIP_CALL( hipMalloc(&ptr, size) );
222 #endif
223
224 #endif /* #if defined(HYPRE_USING_UMPIRE_DEVICE) */
225 }
226
227 if (ptr && zeroinit)
228 {
229 hypre_DeviceMemset(ptr, 0, size);
230 }
231
232 return ptr;
233 }
234
235 static inline void *
hypre_UnifiedMalloc(size_t size,HYPRE_Int zeroinit)236 hypre_UnifiedMalloc(size_t size, HYPRE_Int zeroinit)
237 {
238 void *ptr = NULL;
239
240 #if defined(HYPRE_USING_UMPIRE_UM)
241 hypre_umpire_um_pooled_allocate(&ptr, size);
242 #else
243
244 #if defined(HYPRE_USING_DEVICE_OPENMP)
245 HYPRE_CUDA_CALL( cudaMallocManaged(&ptr, size, cudaMemAttachGlobal) );
246 #endif
247
248 #if defined(HYPRE_USING_CUDA)
249 #if defined(HYPRE_USING_DEVICE_POOL)
250 HYPRE_CUDA_CALL( hypre_CachingMallocManaged(&ptr, size) );
251 #else
252 HYPRE_CUDA_CALL( cudaMallocManaged(&ptr, size, cudaMemAttachGlobal) );
253 #endif
254 #endif
255
256 #if defined(HYPRE_USING_HIP)
257 HYPRE_HIP_CALL( hipMallocManaged(&ptr, size, hipMemAttachGlobal) );
258 #endif
259
260 #endif /* #if defined(HYPRE_USING_UMPIRE_UM) */
261
262 /* prefecth to device */
263 if (ptr)
264 {
265 hypre_UnifiedMemPrefetch(ptr, size, hypre_MEMORY_DEVICE);
266 }
267
268 if (ptr && zeroinit)
269 {
270 hypre_UnifiedMemset(ptr, 0, size);
271 }
272
273 return ptr;
274 }
275
276 static inline void *
hypre_HostPinnedMalloc(size_t size,HYPRE_Int zeroinit)277 hypre_HostPinnedMalloc(size_t size, HYPRE_Int zeroinit)
278 {
279 void *ptr = NULL;
280
281 #if defined(HYPRE_USING_UMPIRE_PINNED)
282 hypre_umpire_pinned_pooled_allocate(&ptr, size);
283 #else
284
285 #if defined(HYPRE_USING_DEVICE_OPENMP)
286 HYPRE_CUDA_CALL( cudaMallocHost(&ptr, size) );
287 #endif
288
289 #if defined(HYPRE_USING_CUDA)
290 HYPRE_CUDA_CALL( cudaMallocHost(&ptr, size) );
291 #endif
292
293 #if defined(HYPRE_USING_HIP)
294 HYPRE_HIP_CALL( hipHostMalloc(&ptr, size) );
295 #endif
296
297 #endif /* #if defined(HYPRE_USING_UMPIRE_PINNED) */
298
299 if (ptr && zeroinit)
300 {
301 hypre_HostMemset(ptr, 0, size);
302 }
303
304 return ptr;
305 }
306
307 static inline void *
hypre_MAlloc_core(size_t size,HYPRE_Int zeroinit,hypre_MemoryLocation location)308 hypre_MAlloc_core(size_t size, HYPRE_Int zeroinit, hypre_MemoryLocation location)
309 {
310 if (size == 0)
311 {
312 return NULL;
313 }
314
315 void *ptr = NULL;
316
317 switch (location)
318 {
319 case hypre_MEMORY_HOST :
320 ptr = hypre_HostMalloc(size, zeroinit);
321 break;
322 case hypre_MEMORY_DEVICE :
323 ptr = hypre_DeviceMalloc(size, zeroinit);
324 break;
325 case hypre_MEMORY_UNIFIED :
326 ptr = hypre_UnifiedMalloc(size, zeroinit);
327 break;
328 case hypre_MEMORY_HOST_PINNED :
329 ptr = hypre_HostPinnedMalloc(size, zeroinit);
330 break;
331 default :
332 hypre_WrongMemoryLocation();
333 }
334
335 if (!ptr)
336 {
337 hypre_OutOfMemory(size);
338 hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1);
339 }
340
341 return ptr;
342 }
343
344 void *
_hypre_MAlloc(size_t size,hypre_MemoryLocation location)345 _hypre_MAlloc(size_t size, hypre_MemoryLocation location)
346 {
347 return hypre_MAlloc_core(size, 0, location);
348 }
349
350 /*--------------------------------------------------------------------------
351 * Free
352 *--------------------------------------------------------------------------*/
353 static inline void
hypre_HostFree(void * ptr)354 hypre_HostFree(void *ptr)
355 {
356 #if defined(HYPRE_USING_UMPIRE_HOST)
357 hypre_umpire_host_pooled_free(ptr);
358 #else
359 free(ptr);
360 #endif
361 }
362
363 static inline void
hypre_DeviceFree(void * ptr)364 hypre_DeviceFree(void *ptr)
365 {
366 if ( hypre_HandleUserDeviceMfree(hypre_handle()) )
367 {
368 hypre_HandleUserDeviceMfree(hypre_handle())(ptr);
369 }
370 else
371 {
372 #if defined(HYPRE_USING_UMPIRE_DEVICE)
373 hypre_umpire_device_pooled_free(ptr);
374 #else
375
376 #if defined(HYPRE_USING_DEVICE_OPENMP)
377 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
378 omp_target_free(ptr, hypre__offload_device_num);
379 #else
380 HYPRE_OMPOffload(hypre__offload_device_num, ptr, ((size_t *) ptr)[-1], "exit", "delete");
381 #endif
382 #endif
383
384 #if defined(HYPRE_USING_CUDA)
385 #if defined(HYPRE_USING_DEVICE_POOL)
386 HYPRE_CUDA_CALL( hypre_CachingFreeDevice(ptr) );
387 #else
388 HYPRE_CUDA_CALL( cudaFree(ptr) );
389 #endif
390 #endif
391
392 #if defined(HYPRE_USING_HIP)
393 HYPRE_HIP_CALL( hipFree(ptr) );
394 #endif
395
396 #endif /* #if defined(HYPRE_USING_UMPIRE_DEVICE) */
397 }
398 }
399
400 static inline void
hypre_UnifiedFree(void * ptr)401 hypre_UnifiedFree(void *ptr)
402 {
403 #if defined(HYPRE_USING_UMPIRE_UM)
404 hypre_umpire_um_pooled_free(ptr);
405 #else
406
407 #if defined(HYPRE_USING_DEVICE_OPENMP)
408 HYPRE_CUDA_CALL( cudaFree(ptr) );
409 #endif
410
411 #if defined(HYPRE_USING_CUDA)
412 #if defined(HYPRE_USING_DEVICE_POOL)
413 HYPRE_CUDA_CALL( hypre_CachingFreeManaged(ptr) );
414 #else
415 HYPRE_CUDA_CALL( cudaFree(ptr) );
416 #endif
417 #endif
418
419 #if defined(HYPRE_USING_HIP)
420 HYPRE_HIP_CALL( hipFree(ptr) );
421 #endif
422
423 #endif /* #if defined(HYPRE_USING_UMPIRE_UM) */
424 }
425
426 static inline void
hypre_HostPinnedFree(void * ptr)427 hypre_HostPinnedFree(void *ptr)
428 {
429 #if defined(HYPRE_USING_UMPIRE_PINNED)
430 hypre_umpire_pinned_pooled_free(ptr);
431 #else
432
433 #if defined(HYPRE_USING_DEVICE_OPENMP)
434 HYPRE_CUDA_CALL( cudaFreeHost(ptr) );
435 #endif
436
437 #if defined(HYPRE_USING_CUDA)
438 HYPRE_CUDA_CALL( cudaFreeHost(ptr) );
439 #endif
440
441 #if defined(HYPRE_USING_HIP)
442 HYPRE_HIP_CALL( hipHostFree(ptr) );
443 #endif
444
445 #endif /* #if defined(HYPRE_USING_UMPIRE_PINNED) */
446 }
447
448 static inline void
hypre_Free_core(void * ptr,hypre_MemoryLocation location)449 hypre_Free_core(void *ptr, hypre_MemoryLocation location)
450 {
451 if (!ptr)
452 {
453 return;
454 }
455
456 #ifdef HYPRE_DEBUG
457 hypre_MemoryLocation tmp;
458 hypre_GetPointerLocation(ptr, &tmp);
459 /* do not use hypre_assert, which has alloc and free;
460 * will create an endless loop otherwise */
461 assert(location == tmp);
462 #endif
463
464 switch (location)
465 {
466 case hypre_MEMORY_HOST :
467 hypre_HostFree(ptr);
468 break;
469 case hypre_MEMORY_DEVICE :
470 hypre_DeviceFree(ptr);
471 break;
472 case hypre_MEMORY_UNIFIED :
473 hypre_UnifiedFree(ptr);
474 break;
475 case hypre_MEMORY_HOST_PINNED :
476 hypre_HostPinnedFree(ptr);
477 break;
478 default :
479 hypre_WrongMemoryLocation();
480 }
481 }
482
483 void
_hypre_Free(void * ptr,hypre_MemoryLocation location)484 _hypre_Free(void *ptr, hypre_MemoryLocation location)
485 {
486 hypre_Free_core(ptr, location);
487 }
488
489
490 /*--------------------------------------------------------------------------
491 * Memcpy
492 *--------------------------------------------------------------------------*/
493 static inline void
hypre_Memcpy_core(void * dst,void * src,size_t size,hypre_MemoryLocation loc_dst,hypre_MemoryLocation loc_src)494 hypre_Memcpy_core(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, hypre_MemoryLocation loc_src)
495 {
496 if (dst == NULL || src == NULL)
497 {
498 if (size)
499 {
500 hypre_printf("hypre_Memcpy warning: copy %ld bytes from %p to %p !\n", size, src, dst);
501 hypre_assert(0);
502 }
503
504 return;
505 }
506
507 if (dst == src)
508 {
509 return;
510 }
511
512 /* Totally 4 x 4 = 16 cases */
513
514 /* 4: Host <-- Host, Host <-- Pinned,
515 * Pinned <-- Host, Pinned <-- Pinned.
516 */
517 if ( loc_dst != hypre_MEMORY_DEVICE && loc_dst != hypre_MEMORY_UNIFIED &&
518 loc_src != hypre_MEMORY_DEVICE && loc_src != hypre_MEMORY_UNIFIED )
519 {
520 memcpy(dst, src, size);
521 return;
522 }
523
524
525 /* 3: UVM <-- Device, Device <-- UVM, UVM <-- UVM */
526 if ( (loc_dst == hypre_MEMORY_UNIFIED && loc_src == hypre_MEMORY_DEVICE) ||
527 (loc_dst == hypre_MEMORY_DEVICE && loc_src == hypre_MEMORY_UNIFIED) ||
528 (loc_dst == hypre_MEMORY_UNIFIED && loc_src == hypre_MEMORY_UNIFIED) )
529 {
530 #if defined(HYPRE_USING_DEVICE_OPENMP)
531 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
532 #endif
533
534 #if defined(HYPRE_USING_CUDA)
535 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
536 #endif
537
538 #if defined(HYPRE_USING_HIP)
539 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToDevice) );
540 #endif
541 return;
542 }
543
544
545 /* 2: UVM <-- Host, UVM <-- Pinned */
546 if (loc_dst == hypre_MEMORY_UNIFIED)
547 {
548 #if defined(HYPRE_USING_DEVICE_OPENMP)
549 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
550 #endif
551
552 #if defined(HYPRE_USING_CUDA)
553 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
554 #endif
555
556 #if defined(HYPRE_USING_HIP)
557 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyHostToDevice) );
558 #endif
559 return;
560 }
561
562
563 /* 2: Host <-- UVM, Pinned <-- UVM */
564 if (loc_src == hypre_MEMORY_UNIFIED)
565 {
566 #if defined(HYPRE_USING_DEVICE_OPENMP)
567 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost) );
568 #endif
569
570 #if defined(HYPRE_USING_CUDA)
571 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost) );
572 #endif
573
574 #if defined(HYPRE_USING_HIP)
575 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToHost) );
576 #endif
577 return;
578 }
579
580
581 /* 2: Device <-- Host, Device <-- Pinned */
582 if ( loc_dst == hypre_MEMORY_DEVICE && (loc_src == hypre_MEMORY_HOST || loc_src == hypre_MEMORY_HOST_PINNED) )
583 {
584 #if defined(HYPRE_USING_DEVICE_OPENMP)
585 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
586 omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_device_num, hypre__offload_host_num);
587 #else
588 memcpy(dst, src, size);
589 HYPRE_OMPOffload(hypre__offload_device_num, dst, size, "update", "to");
590 #endif
591 #endif
592
593 #if defined(HYPRE_USING_CUDA)
594 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice) );
595 #endif
596
597 #if defined(HYPRE_USING_HIP)
598 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyHostToDevice) );
599 #endif
600 return;
601 }
602
603
604 /* 2: Host <-- Device, Pinned <-- Device */
605 if ( (loc_dst == hypre_MEMORY_HOST || loc_dst == hypre_MEMORY_HOST_PINNED) && loc_src == hypre_MEMORY_DEVICE )
606 {
607 #if defined(HYPRE_USING_DEVICE_OPENMP)
608 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
609 omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_host_num, hypre__offload_device_num);
610 #else
611 HYPRE_OMPOffload(hypre__offload_device_num, src, size, "update", "from");
612 memcpy(dst, src, size);
613 #endif
614 #endif
615
616 #if defined(HYPRE_USING_CUDA)
617 HYPRE_CUDA_CALL( cudaMemcpy( dst, src, size, cudaMemcpyDeviceToHost) );
618 #endif
619
620 #if defined(HYPRE_USING_HIP)
621 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToHost) );
622 #endif
623 return;
624 }
625
626
627 /* 1: Device <-- Device */
628 if (loc_dst == hypre_MEMORY_DEVICE && loc_src == hypre_MEMORY_DEVICE)
629 {
630 #if defined(HYPRE_USING_DEVICE_OPENMP)
631 #if defined(HYPRE_DEVICE_OPENMP_ALLOC)
632 omp_target_memcpy(dst, src, size, 0, 0, hypre__offload_device_num, hypre__offload_device_num);
633 #else
634 HYPRE_OMPOffload(hypre__offload_device_num, src, size, "update", "from");
635 memcpy(dst, src, size);
636 HYPRE_OMPOffload(hypre__offload_device_num, dst, size, "update", "to");
637 #endif
638 #endif
639
640 #if defined(HYPRE_USING_CUDA)
641 HYPRE_CUDA_CALL( cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice) );
642 #endif
643
644 #if defined(HYPRE_USING_HIP)
645 HYPRE_HIP_CALL( hipMemcpy(dst, src, size, hipMemcpyDeviceToDevice) );
646 #endif
647 return;
648 }
649
650 hypre_WrongMemoryLocation();
651 }
652
653 /*--------------------------------------------------------------------------*
654 * ExecPolicy
655 *--------------------------------------------------------------------------*/
656 static inline HYPRE_ExecutionPolicy
hypre_GetExecPolicy1_core(hypre_MemoryLocation location)657 hypre_GetExecPolicy1_core(hypre_MemoryLocation location)
658 {
659 HYPRE_ExecutionPolicy exec = HYPRE_EXEC_UNDEFINED;
660
661 switch (location)
662 {
663 case hypre_MEMORY_HOST :
664 case hypre_MEMORY_HOST_PINNED :
665 exec = HYPRE_EXEC_HOST;
666 break;
667 case hypre_MEMORY_DEVICE :
668 exec = HYPRE_EXEC_DEVICE;
669 break;
670 case hypre_MEMORY_UNIFIED :
671 #if defined(HYPRE_USING_GPU)
672 exec = hypre_HandleDefaultExecPolicy(hypre_handle());
673 #endif
674 break;
675 default :
676 hypre_WrongMemoryLocation();
677 }
678
679 hypre_assert(exec != HYPRE_EXEC_UNDEFINED);
680
681 return exec;
682 }
683
684 /* for binary operation */
685 static inline HYPRE_ExecutionPolicy
hypre_GetExecPolicy2_core(hypre_MemoryLocation location1,hypre_MemoryLocation location2)686 hypre_GetExecPolicy2_core(hypre_MemoryLocation location1,
687 hypre_MemoryLocation location2)
688 {
689 HYPRE_ExecutionPolicy exec = HYPRE_EXEC_UNDEFINED;
690
691 /* HOST_PINNED has the same exec policy as HOST */
692 if (location1 == hypre_MEMORY_HOST_PINNED)
693 {
694 location1 = hypre_MEMORY_HOST;
695 }
696
697 if (location2 == hypre_MEMORY_HOST_PINNED)
698 {
699 location2 = hypre_MEMORY_HOST;
700 }
701
702 /* no policy for these combinations */
703 if ( (location1 == hypre_MEMORY_HOST && location2 == hypre_MEMORY_DEVICE) ||
704 (location2 == hypre_MEMORY_HOST && location1 == hypre_MEMORY_DEVICE) )
705 {
706 exec = HYPRE_EXEC_UNDEFINED;
707 }
708
709 /* this should never happen */
710 if ( (location1 == hypre_MEMORY_UNIFIED && location2 == hypre_MEMORY_DEVICE) ||
711 (location2 == hypre_MEMORY_UNIFIED && location1 == hypre_MEMORY_DEVICE) )
712 {
713 exec = HYPRE_EXEC_UNDEFINED;
714 }
715
716 if (location1 == hypre_MEMORY_UNIFIED && location2 == hypre_MEMORY_UNIFIED)
717 {
718 #if defined(HYPRE_USING_GPU)
719 exec = hypre_HandleDefaultExecPolicy(hypre_handle());
720 #endif
721 }
722
723 if (location1 == hypre_MEMORY_HOST || location2 == hypre_MEMORY_HOST)
724 {
725 exec = HYPRE_EXEC_HOST;
726 }
727
728 if (location1 == hypre_MEMORY_DEVICE || location2 == hypre_MEMORY_DEVICE)
729 {
730 exec = HYPRE_EXEC_DEVICE;
731 }
732
733 hypre_assert(exec != HYPRE_EXEC_UNDEFINED);
734
735 return exec;
736 }
737
738 /*==========================================================================
739 * Conceptual memory location (HYPRE_MemoryLocation) interface
740 *==========================================================================*/
741
742 /*--------------------------------------------------------------------------
743 * hypre_Memset
744 * "Sets the first num bytes of the block of memory pointed by ptr to the specified value
745 * (*** value is interpreted as an unsigned char ***)"
746 * http://www.cplusplus.com/reference/cstring/memset/
747 *--------------------------------------------------------------------------*/
748 void *
hypre_Memset(void * ptr,HYPRE_Int value,size_t num,HYPRE_MemoryLocation location)749 hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location)
750 {
751 if (num == 0)
752 {
753 return ptr;
754 }
755
756 if (ptr == NULL)
757 {
758 if (num)
759 {
760 hypre_printf("hypre_Memset warning: set values for %ld bytes at %p !\n", num, ptr);
761 }
762 return ptr;
763 }
764
765 switch (hypre_GetActualMemLocation(location))
766 {
767 case hypre_MEMORY_HOST :
768 case hypre_MEMORY_HOST_PINNED :
769 hypre_HostMemset(ptr, value, num);
770 break;
771 case hypre_MEMORY_DEVICE :
772 hypre_DeviceMemset(ptr, value, num);
773 break;
774 case hypre_MEMORY_UNIFIED :
775 hypre_UnifiedMemset(ptr, value, num);
776 break;
777 default :
778 hypre_WrongMemoryLocation();
779 }
780
781 return ptr;
782 }
783
784 /*--------------------------------------------------------------------------
785 * Memprefetch
786 *--------------------------------------------------------------------------*/
787 void
hypre_MemPrefetch(void * ptr,size_t size,HYPRE_MemoryLocation location)788 hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location)
789 {
790 hypre_UnifiedMemPrefetch( ptr, size, hypre_GetActualMemLocation(location) );
791 }
792
793 /*--------------------------------------------------------------------------*
794 * hypre_MAlloc, hypre_CAlloc
795 *--------------------------------------------------------------------------*/
796
797 void *
hypre_MAlloc(size_t size,HYPRE_MemoryLocation location)798 hypre_MAlloc(size_t size, HYPRE_MemoryLocation location)
799 {
800 return hypre_MAlloc_core(size, 0, hypre_GetActualMemLocation(location));
801 }
802
803 void *
hypre_CAlloc(size_t count,size_t elt_size,HYPRE_MemoryLocation location)804 hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location)
805 {
806 return hypre_MAlloc_core(count * elt_size, 1, hypre_GetActualMemLocation(location));
807 }
808
809 /*--------------------------------------------------------------------------
810 * hypre_Free
811 *--------------------------------------------------------------------------*/
812
813 void
hypre_Free(void * ptr,HYPRE_MemoryLocation location)814 hypre_Free(void *ptr, HYPRE_MemoryLocation location)
815 {
816 hypre_Free_core(ptr, hypre_GetActualMemLocation(location));
817 }
818
819 /*--------------------------------------------------------------------------
820 * hypre_Memcpy
821 *--------------------------------------------------------------------------*/
822
823 void
hypre_Memcpy(void * dst,void * src,size_t size,HYPRE_MemoryLocation loc_dst,HYPRE_MemoryLocation loc_src)824 hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, HYPRE_MemoryLocation loc_src)
825 {
826 hypre_Memcpy_core( dst, src, size, hypre_GetActualMemLocation(loc_dst), hypre_GetActualMemLocation(loc_src) );
827 }
828
829 /*--------------------------------------------------------------------------
830 * hypre_ReAlloc
831 *--------------------------------------------------------------------------*/
832 void *
hypre_ReAlloc(void * ptr,size_t size,HYPRE_MemoryLocation location)833 hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location)
834 {
835 if (size == 0)
836 {
837 hypre_Free(ptr, location);
838 return NULL;
839 }
840
841 if (ptr == NULL)
842 {
843 return hypre_MAlloc(size, location);
844 }
845
846 if (hypre_GetActualMemLocation(location) != hypre_MEMORY_HOST)
847 {
848 hypre_printf("hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n");
849 hypre_assert(0);
850 hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1);
851 return NULL;
852 }
853
854 #if defined(HYPRE_USING_UMPIRE_HOST)
855 ptr = hypre_umpire_host_pooled_realloc(ptr, size);
856 #else
857 ptr = realloc(ptr, size);
858 #endif
859
860 if (!ptr)
861 {
862 hypre_OutOfMemory(size);
863 }
864
865 return ptr;
866 }
867
868 void *
hypre_ReAlloc_v2(void * ptr,size_t old_size,size_t new_size,HYPRE_MemoryLocation location)869 hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location)
870 {
871 if (new_size == 0)
872 {
873 hypre_Free(ptr, location);
874 return NULL;
875 }
876
877 if (ptr == NULL)
878 {
879 return hypre_MAlloc(new_size, location);
880 }
881
882 void *new_ptr = hypre_MAlloc(new_size, location);
883 size_t smaller_size = new_size > old_size ? old_size : new_size;
884 hypre_Memcpy(new_ptr, ptr, smaller_size, location, location);
885 hypre_Free(ptr, location);
886 ptr = new_ptr;
887
888 if (!ptr)
889 {
890 hypre_OutOfMemory(new_size);
891 }
892
893 return ptr;
894 }
895
896 /*--------------------------------------------------------------------------*
897 * hypre_GetExecPolicy: return execution policy based on memory locations
898 *--------------------------------------------------------------------------*/
899 /* for unary operation */
900 HYPRE_ExecutionPolicy
hypre_GetExecPolicy1(HYPRE_MemoryLocation location)901 hypre_GetExecPolicy1(HYPRE_MemoryLocation location)
902 {
903
904 return hypre_GetExecPolicy1_core(hypre_GetActualMemLocation(location));
905 }
906
907 /* for binary operation */
908 HYPRE_ExecutionPolicy
hypre_GetExecPolicy2(HYPRE_MemoryLocation location1,HYPRE_MemoryLocation location2)909 hypre_GetExecPolicy2(HYPRE_MemoryLocation location1,
910 HYPRE_MemoryLocation location2)
911 {
912 return hypre_GetExecPolicy2_core(hypre_GetActualMemLocation(location1),
913 hypre_GetActualMemLocation(location2));
914 }
915
916 /*--------------------------------------------------------------------------
917 * Query the actual memory location pointed by ptr
918 *--------------------------------------------------------------------------*/
919 HYPRE_Int
hypre_GetPointerLocation(const void * ptr,hypre_MemoryLocation * memory_location)920 hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location)
921 {
922 HYPRE_Int ierr = 0;
923
924 #if defined(HYPRE_USING_GPU)
925 *memory_location = hypre_MEMORY_UNDEFINED;
926
927 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
928 struct cudaPointerAttributes attr;
929
930 #if (CUDART_VERSION >= 10000)
931 #if (CUDART_VERSION >= 11000)
932 HYPRE_CUDA_CALL( cudaPointerGetAttributes(&attr, ptr) );
933 #else
934 cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
935 if (err != cudaSuccess)
936 {
937 ierr = 1;
938 /* clear the error */
939 cudaGetLastError();
940 }
941 #endif
942 if (attr.type == cudaMemoryTypeUnregistered)
943 {
944 *memory_location = hypre_MEMORY_HOST;
945 }
946 else if (attr.type == cudaMemoryTypeHost)
947 {
948 *memory_location = hypre_MEMORY_HOST_PINNED;
949 }
950 else if (attr.type == cudaMemoryTypeDevice)
951 {
952 *memory_location = hypre_MEMORY_DEVICE;
953 }
954 else if (attr.type == cudaMemoryTypeManaged)
955 {
956 *memory_location = hypre_MEMORY_UNIFIED;
957 }
958 #else
959 cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
960 if (err != cudaSuccess)
961 {
962 ierr = 1;
963
964 /* clear the error */
965 cudaGetLastError();
966
967 if (err == cudaErrorInvalidValue)
968 {
969 *memory_location = hypre_MEMORY_HOST;
970 }
971 }
972 else if (attr.isManaged)
973 {
974 *memory_location = hypre_MEMORY_UNIFIED;
975 }
976 else if (attr.memoryType == cudaMemoryTypeDevice)
977 {
978 *memory_location = hypre_MEMORY_DEVICE;
979 }
980 else if (attr.memoryType == cudaMemoryTypeHost)
981 {
982 *memory_location = hypre_MEMORY_HOST_PINNED;
983 }
984 #endif // CUDART_VERSION >= 10000
985 #endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
986
987 #if defined(HYPRE_USING_HIP)
988
989 struct hipPointerAttribute_t attr;
990 *memory_location = hypre_MEMORY_UNDEFINED;
991
992 hipError_t err = hipPointerGetAttributes(&attr, ptr);
993 if (err != hipSuccess)
994 {
995 ierr = 1;
996
997 /* clear the error */
998 hipGetLastError();
999
1000 if (err == hipErrorInvalidValue)
1001 {
1002 *memory_location = hypre_MEMORY_HOST;
1003 }
1004 }
1005 else if (attr.isManaged)
1006 {
1007 *memory_location = hypre_MEMORY_UNIFIED;
1008 }
1009 else if (attr.memoryType == hipMemoryTypeDevice)
1010 {
1011 *memory_location = hypre_MEMORY_DEVICE;
1012 }
1013 else if (attr.memoryType == hipMemoryTypeHost)
1014 {
1015 *memory_location = hypre_MEMORY_HOST_PINNED;
1016 }
1017 #endif // defined(HYPRE_USING_HIP)
1018
1019 #else /* #if defined(HYPRE_USING_GPU) */
1020 *memory_location = hypre_MEMORY_HOST;
1021 #endif
1022
1023 return ierr;
1024 }
1025
1026 #ifdef HYPRE_USING_MEMORY_TRACKER
1027
1028 /*--------------------------------------------------------------------------
1029 * Memory tracker
1030 * do not use hypre_T* in the following since we don't want to track them *
1031 *--------------------------------------------------------------------------*/
1032 hypre_MemoryTracker *
hypre_MemoryTrackerCreate()1033 hypre_MemoryTrackerCreate()
1034 {
1035 hypre_MemoryTracker *ptr = (hypre_MemoryTracker *) calloc(1, sizeof(hypre_MemoryTracker));
1036 return ptr;
1037 }
1038
1039 void
hypre_MemoryTrackerDestroy(hypre_MemoryTracker * tracker)1040 hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker)
1041 {
1042 if (tracker)
1043 {
1044 free(tracker->data);
1045 free(tracker);
1046 }
1047 }
1048
1049 void
hypre_MemoryTrackerInsert(const char * action,void * ptr,size_t nbytes,hypre_MemoryLocation memory_location,const char * filename,const char * function,HYPRE_Int line)1050 hypre_MemoryTrackerInsert(const char *action,
1051 void *ptr,
1052 size_t nbytes,
1053 hypre_MemoryLocation memory_location,
1054 const char *filename,
1055 const char *function,
1056 HYPRE_Int line)
1057 {
1058
1059 if (ptr == NULL)
1060 {
1061 return;
1062 }
1063
1064 hypre_MemoryTracker *tracker = hypre_memory_tracker();
1065
1066 if (tracker->alloced_size <= tracker->actual_size)
1067 {
1068 tracker->alloced_size = 2 * tracker->alloced_size + 1;
1069 tracker->data = (hypre_MemoryTrackerEntry *) realloc(tracker->data, tracker->alloced_size * sizeof(hypre_MemoryTrackerEntry));
1070 }
1071
1072 hypre_assert(tracker->actual_size < tracker->alloced_size);
1073
1074 hypre_MemoryTrackerEntry *entry = tracker->data + tracker->actual_size;
1075
1076 sprintf(entry->_action, "%s", action);
1077 entry->_ptr = ptr;
1078 entry->_nbytes = nbytes;
1079 entry->_memory_location = memory_location;
1080 sprintf(entry->_filename, "%s", filename);
1081 sprintf(entry->_function, "%s", function);
1082 entry->_line = line;
1083 /* -1 is the initial value */
1084 entry->_pair = (size_t) -1;
1085
1086 tracker->actual_size ++;
1087 }
1088
1089
1090 /* do not use hypre_printf, hypre_fprintf, which have TAlloc
1091 * endless loop "for (i = 0; i < tracker->actual_size; i++)" otherwise */
1092 HYPRE_Int
hypre_PrintMemoryTracker()1093 hypre_PrintMemoryTracker()
1094 {
1095 HYPRE_Int myid, ierr = 0;
1096 char filename[256];
1097 FILE *file;
1098 size_t i, j;
1099
1100 hypre_MemoryTracker *tracker = hypre_memory_tracker();
1101
1102 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
1103 hypre_sprintf(filename,"HypreMemoryTrack.log.%05d", myid);
1104 if ((file = fopen(filename, "a")) == NULL)
1105 {
1106 fprintf(stderr, "Error: can't open output file %s\n", filename);
1107 return hypre_error_flag;
1108 }
1109
1110 fprintf(file, "==== Operations:\n");
1111 fprintf(file, " ID EVENT ADDRESS BYTE LOCATION FILE(LINE) FUNCTION | Memory ( H P D U )\n");
1112
1113 size_t totl_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1114 size_t peak_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1115 size_t curr_bytes[hypre_MEMORY_UNIFIED+1] = {0};
1116
1117 for (i = 0; i < tracker->actual_size; i++)
1118 {
1119 if (strstr(tracker->data[i]._action, "alloc") != NULL)
1120 {
1121 totl_bytes[tracker->data[i]._memory_location] += tracker->data[i]._nbytes;
1122 curr_bytes[tracker->data[i]._memory_location] += tracker->data[i]._nbytes;
1123 peak_bytes[tracker->data[i]._memory_location] =
1124 hypre_max( curr_bytes[tracker->data[i]._memory_location],
1125 peak_bytes[tracker->data[i]._memory_location] );
1126
1127 /* for each unpaired "alloc", find its "free" */
1128 if (tracker->data[i]._pair != (size_t) -1)
1129 {
1130 if ( tracker->data[i]._pair >= tracker->actual_size ||
1131 tracker->data[tracker->data[i]._pair]._pair != i)
1132 {
1133 fprintf(stderr, "hypre memory tracker internal error!\n");
1134 hypre_MPI_Abort(hypre_MPI_COMM_WORLD, 1);
1135 }
1136
1137 continue;
1138 }
1139
1140 for (j = i+1; j < tracker->actual_size; j++)
1141 {
1142 if ( strstr(tracker->data[j]._action, "free") != NULL &&
1143 tracker->data[j]._pair == (size_t) -1 &&
1144 tracker->data[i]._ptr == tracker->data[j]._ptr &&
1145 tracker->data[i]._memory_location == tracker->data[j]._memory_location )
1146 {
1147 tracker->data[i]._pair = j;
1148 tracker->data[j]._pair = i;
1149 tracker->data[j]._nbytes = tracker->data[i]._nbytes;
1150 break;
1151 }
1152 }
1153
1154 if (tracker->data[i]._pair == (size_t) -1)
1155 {
1156 fprintf(stderr, "%6zu: %16p may not freed\n", i, tracker->data[i]._ptr );
1157 }
1158 }
1159 else if (strstr(tracker->data[i]._action, "free") != NULL)
1160 {
1161 size_t pair = tracker->data[i]._pair;
1162
1163 if (pair == (size_t) -1)
1164 {
1165 fprintf(stderr, "%6zu: unpaired free at %16p\n", i, tracker->data[i]._ptr );
1166 }
1167 else
1168 {
1169 curr_bytes[tracker->data[i]._memory_location] -= tracker->data[pair]._nbytes;
1170 }
1171 }
1172
1173 if (i < tracker->prev_end)
1174 {
1175 continue;
1176 }
1177
1178 char memory_location[256];
1179 char nbytes[32];
1180
1181 if (tracker->data[i]._memory_location == hypre_MEMORY_HOST)
1182 {
1183 sprintf(memory_location, "%s", "HOST");
1184 }
1185 else if (tracker->data[i]._memory_location == hypre_MEMORY_HOST_PINNED)
1186 {
1187 sprintf(memory_location, "%s", "HOST_PINNED");
1188 }
1189 else if (tracker->data[i]._memory_location == hypre_MEMORY_DEVICE)
1190 {
1191 sprintf(memory_location, "%s", "DEVICE");
1192 }
1193 else if (tracker->data[i]._memory_location == hypre_MEMORY_UNIFIED)
1194 {
1195 sprintf(memory_location, "%s", "UNIFIED");
1196 }
1197 else
1198 {
1199 sprintf(memory_location, "%s", "UNDEFINED");
1200 }
1201
1202 if (tracker->data[i]._nbytes != (size_t) -1)
1203 {
1204 sprintf(nbytes, "%zu", tracker->data[i]._nbytes);
1205 }
1206 else
1207 {
1208 sprintf(nbytes, "%s", "");
1209 }
1210
1211 fprintf(file, " %6zu %12s %16p %10s %16s %40s (%5d) %50s | %12zu %12zu %12zu %12zu\n",
1212 i,
1213 tracker->data[i]._action,
1214 tracker->data[i]._ptr,
1215 nbytes,
1216 memory_location,
1217 tracker->data[i]._filename,
1218 tracker->data[i]._line,
1219 tracker->data[i]._function,
1220 curr_bytes[hypre_MEMORY_HOST],
1221 curr_bytes[hypre_MEMORY_HOST_PINNED],
1222 curr_bytes[hypre_MEMORY_DEVICE],
1223 curr_bytes[hypre_MEMORY_UNIFIED]
1224 );
1225 }
1226
1227 fprintf(file, "\n==== Total allocated (byte):\n");
1228 fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1229 totl_bytes[hypre_MEMORY_HOST],
1230 totl_bytes[hypre_MEMORY_HOST_PINNED],
1231 totl_bytes[hypre_MEMORY_DEVICE],
1232 totl_bytes[hypre_MEMORY_UNIFIED]);
1233
1234 fprintf(file, "\n==== Peak (byte):\n");
1235 fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1236 peak_bytes[hypre_MEMORY_HOST],
1237 peak_bytes[hypre_MEMORY_HOST_PINNED],
1238 peak_bytes[hypre_MEMORY_DEVICE],
1239 peak_bytes[hypre_MEMORY_UNIFIED]);
1240
1241 fprintf(file, "\n==== Reachable (byte):\n");
1242 fprintf(file, "HOST: %16zu, HOST_PINNED %16zu, DEVICE %16zu, UNIFIED %16zu\n",
1243 curr_bytes[hypre_MEMORY_HOST],
1244 curr_bytes[hypre_MEMORY_HOST_PINNED],
1245 curr_bytes[hypre_MEMORY_DEVICE],
1246 curr_bytes[hypre_MEMORY_UNIFIED]);
1247
1248 fprintf(file, "\n==== Warnings:\n");
1249 for (i = 0; i < tracker->actual_size; i++)
1250 {
1251 if (tracker->data[i]._pair == (size_t) -1)
1252 {
1253 if (strstr(tracker->data[i]._action, "alloc") != NULL)
1254 {
1255 fprintf(file, "%6zu: %p may have not been freed\n", i, tracker->data[i]._ptr );
1256 }
1257 else if (strstr(tracker->data[i]._action, "free") != NULL)
1258 {
1259 fprintf(file, "%6zu: unpaired free at %16p\n", i, tracker->data[i]._ptr );
1260 }
1261 }
1262 }
1263
1264 fclose(file);
1265
1266 tracker->prev_end = tracker->actual_size;
1267
1268 return ierr;
1269 }
1270 #endif
1271
1272 /*--------------------------------------------------------------------------*
1273 * Memory Pool
1274 *--------------------------------------------------------------------------*/
1275
1276 HYPRE_Int
hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,hypre_uint cub_min_bin,hypre_uint cub_max_bin,size_t cub_max_cached_bytes)1277 hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,
1278 hypre_uint cub_min_bin,
1279 hypre_uint cub_max_bin,
1280 size_t cub_max_cached_bytes)
1281 {
1282 #if defined(HYPRE_USING_CUDA)
1283 #ifdef HYPRE_USING_DEVICE_POOL
1284 hypre_HandleCubBinGrowth(hypre_handle()) = cub_bin_growth;
1285 hypre_HandleCubMinBin(hypre_handle()) = cub_min_bin;
1286 hypre_HandleCubMaxBin(hypre_handle()) = cub_max_bin;
1287 hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes;
1288
1289 //TODO XXX RL: cub_min_bin, cub_max_bin are not (re)set
1290 if (hypre_HandleCubDevAllocator(hypre_handle()))
1291 {
1292 hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);
1293 }
1294
1295 if (hypre_HandleCubUvmAllocator(hypre_handle()))
1296 {
1297 hypre_HandleCubUvmAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);
1298 }
1299 #endif
1300 #endif
1301
1302 return hypre_error_flag;
1303 }
1304
1305 HYPRE_Int
HYPRE_SetGPUMemoryPoolSize(HYPRE_Int bin_growth,HYPRE_Int min_bin,HYPRE_Int max_bin,size_t max_cached_bytes)1306 HYPRE_SetGPUMemoryPoolSize(HYPRE_Int bin_growth,
1307 HYPRE_Int min_bin,
1308 HYPRE_Int max_bin,
1309 size_t max_cached_bytes)
1310 {
1311 return hypre_SetCubMemPoolSize(bin_growth, min_bin, max_bin, max_cached_bytes);
1312 }
1313
1314 #ifdef HYPRE_USING_DEVICE_POOL
1315 cudaError_t
hypre_CachingMallocDevice(void ** ptr,size_t nbytes)1316 hypre_CachingMallocDevice(void **ptr, size_t nbytes)
1317 {
1318 if (!hypre_HandleCubDevAllocator(hypre_handle()))
1319 {
1320 hypre_HandleCubDevAllocator(hypre_handle()) =
1321 hypre_CudaDataCubCachingAllocatorCreate( hypre_HandleCubBinGrowth(hypre_handle()),
1322 hypre_HandleCubMinBin(hypre_handle()),
1323 hypre_HandleCubMaxBin(hypre_handle()),
1324 hypre_HandleCubMaxCachedBytes(hypre_handle()),
1325 false,
1326 false,
1327 false );
1328 }
1329
1330 return hypre_HandleCubDevAllocator(hypre_handle()) -> DeviceAllocate(ptr, nbytes);
1331 }
1332
1333 cudaError_t
hypre_CachingFreeDevice(void * ptr)1334 hypre_CachingFreeDevice(void *ptr)
1335 {
1336 return hypre_HandleCubDevAllocator(hypre_handle()) -> DeviceFree(ptr);
1337 }
1338
1339 cudaError_t
hypre_CachingMallocManaged(void ** ptr,size_t nbytes)1340 hypre_CachingMallocManaged(void **ptr, size_t nbytes)
1341 {
1342 if (!hypre_HandleCubUvmAllocator(hypre_handle()))
1343 {
1344 hypre_HandleCubUvmAllocator(hypre_handle()) =
1345 hypre_CudaDataCubCachingAllocatorCreate( hypre_HandleCubBinGrowth(hypre_handle()),
1346 hypre_HandleCubMinBin(hypre_handle()),
1347 hypre_HandleCubMaxBin(hypre_handle()),
1348 hypre_HandleCubMaxCachedBytes(hypre_handle()),
1349 false,
1350 false,
1351 true );
1352 }
1353
1354 return hypre_HandleCubUvmAllocator(hypre_handle()) -> DeviceAllocate(ptr, nbytes);
1355 }
1356
1357 cudaError_t
hypre_CachingFreeManaged(void * ptr)1358 hypre_CachingFreeManaged(void *ptr)
1359 {
1360 return hypre_HandleCubUvmAllocator(hypre_handle()) -> DeviceFree(ptr);
1361 }
1362
1363 hypre_cub_CachingDeviceAllocator *
hypre_CudaDataCubCachingAllocatorCreate(hypre_uint bin_growth,hypre_uint min_bin,hypre_uint max_bin,size_t max_cached_bytes,bool skip_cleanup,bool debug,bool use_managed_memory)1364 hypre_CudaDataCubCachingAllocatorCreate(hypre_uint bin_growth,
1365 hypre_uint min_bin,
1366 hypre_uint max_bin,
1367 size_t max_cached_bytes,
1368 bool skip_cleanup,
1369 bool debug,
1370 bool use_managed_memory)
1371 {
1372 hypre_cub_CachingDeviceAllocator *allocator =
1373 new hypre_cub_CachingDeviceAllocator( bin_growth,
1374 min_bin,
1375 max_bin,
1376 max_cached_bytes,
1377 skip_cleanup,
1378 debug,
1379 use_managed_memory );
1380
1381 return allocator;
1382 }
1383
1384 void
hypre_CudaDataCubCachingAllocatorDestroy(hypre_CudaData * data)1385 hypre_CudaDataCubCachingAllocatorDestroy(hypre_CudaData *data)
1386 {
1387 delete hypre_CudaDataCubDevAllocator(data);
1388 delete hypre_CudaDataCubUvmAllocator(data);
1389 }
1390
1391 #endif // #ifdef HYPRE_USING_DEVICE_POOL
1392
1393 #if defined(HYPRE_USING_UMPIRE_HOST)
1394 HYPRE_Int
hypre_umpire_host_pooled_allocate(void ** ptr,size_t nbytes)1395 hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes)
1396 {
1397 hypre_Handle *handle = hypre_handle();
1398 const char *resource_name = "HOST";
1399 const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1400
1401 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1402 umpire_allocator pooled_allocator;
1403
1404 if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1405 {
1406 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1407 }
1408 else
1409 {
1410 umpire_allocator allocator;
1411 umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1412 umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1413 hypre_HandleUmpireHostPoolSize(handle),
1414 hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1415 hypre_HandleOwnUmpireHostPool(handle) = 1;
1416 }
1417
1418 *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1419
1420 return hypre_error_flag;
1421 }
1422
1423 HYPRE_Int
hypre_umpire_host_pooled_free(void * ptr)1424 hypre_umpire_host_pooled_free(void *ptr)
1425 {
1426 hypre_Handle *handle = hypre_handle();
1427 const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1428 umpire_allocator pooled_allocator;
1429
1430 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1431
1432 hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1433
1434 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1435 umpire_allocator_deallocate(&pooled_allocator, ptr);
1436
1437 return hypre_error_flag;
1438 }
1439
1440 void *
hypre_umpire_host_pooled_realloc(void * ptr,size_t size)1441 hypre_umpire_host_pooled_realloc(void *ptr, size_t size)
1442 {
1443 hypre_Handle *handle = hypre_handle();
1444 const char *pool_name = hypre_HandleUmpireHostPoolName(handle);
1445 umpire_allocator pooled_allocator;
1446
1447 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1448
1449 hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1450
1451 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1452 ptr = umpire_resourcemanager_reallocate_with_allocator(rm_ptr, ptr, size, pooled_allocator);
1453
1454 return ptr;
1455 }
1456 #endif
1457
1458 #if defined(HYPRE_USING_UMPIRE_DEVICE)
1459 HYPRE_Int
hypre_umpire_device_pooled_allocate(void ** ptr,size_t nbytes)1460 hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes)
1461 {
1462 hypre_Handle *handle = hypre_handle();
1463 const hypre_int device_id = hypre_HandleCudaDevice(handle);
1464 char resource_name[16];
1465 const char *pool_name = hypre_HandleUmpireDevicePoolName(handle);
1466
1467 hypre_sprintf(resource_name, "%s::%d", "DEVICE", device_id);
1468
1469 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1470 umpire_allocator pooled_allocator;
1471
1472 if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1473 {
1474 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1475 }
1476 else
1477 {
1478 umpire_allocator allocator;
1479 umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1480 umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1481 hypre_HandleUmpireDevicePoolSize(handle),
1482 hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1483
1484 hypre_HandleOwnUmpireDevicePool(handle) = 1;
1485 }
1486
1487 *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1488
1489 return hypre_error_flag;
1490 }
1491
1492 HYPRE_Int
hypre_umpire_device_pooled_free(void * ptr)1493 hypre_umpire_device_pooled_free(void *ptr)
1494 {
1495 hypre_Handle *handle = hypre_handle();
1496 const char *pool_name = hypre_HandleUmpireDevicePoolName(handle);
1497 umpire_allocator pooled_allocator;
1498
1499 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1500
1501 hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1502
1503 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1504 umpire_allocator_deallocate(&pooled_allocator, ptr);
1505
1506 return hypre_error_flag;
1507 }
1508 #endif
1509
1510 #if defined(HYPRE_USING_UMPIRE_UM)
1511 HYPRE_Int
hypre_umpire_um_pooled_allocate(void ** ptr,size_t nbytes)1512 hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes)
1513 {
1514 hypre_Handle *handle = hypre_handle();
1515 const char *resource_name = "UM";
1516 const char *pool_name = hypre_HandleUmpireUMPoolName(handle);
1517
1518 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1519 umpire_allocator pooled_allocator;
1520
1521 if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1522 {
1523 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1524 }
1525 else
1526 {
1527 umpire_allocator allocator;
1528 umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1529 umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1530 hypre_HandleUmpireUMPoolSize(handle),
1531 hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1532
1533 hypre_HandleOwnUmpireUMPool(handle) = 1;
1534 }
1535
1536 *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1537
1538 return hypre_error_flag;
1539 }
1540
1541 HYPRE_Int
hypre_umpire_um_pooled_free(void * ptr)1542 hypre_umpire_um_pooled_free(void *ptr)
1543 {
1544 hypre_Handle *handle = hypre_handle();
1545 const char *pool_name = hypre_HandleUmpireUMPoolName(handle);
1546 umpire_allocator pooled_allocator;
1547
1548 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1549
1550 hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1551
1552 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1553 umpire_allocator_deallocate(&pooled_allocator, ptr);
1554
1555 return hypre_error_flag;
1556 }
1557 #endif
1558
1559 #if defined(HYPRE_USING_UMPIRE_PINNED)
1560 HYPRE_Int
hypre_umpire_pinned_pooled_allocate(void ** ptr,size_t nbytes)1561 hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes)
1562 {
1563 hypre_Handle *handle = hypre_handle();
1564 const char *resource_name = "PINNED";
1565 const char *pool_name = hypre_HandleUmpirePinnedPoolName(handle);
1566
1567 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1568 umpire_allocator pooled_allocator;
1569
1570 if ( umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name) )
1571 {
1572 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1573 }
1574 else
1575 {
1576 umpire_allocator allocator;
1577 umpire_resourcemanager_get_allocator_by_name(rm_ptr, resource_name, &allocator);
1578 umpire_resourcemanager_make_allocator_pool(rm_ptr, pool_name, allocator,
1579 hypre_HandleUmpirePinnedPoolSize(handle),
1580 hypre_HandleUmpireBlockSize(handle), &pooled_allocator);
1581
1582 hypre_HandleOwnUmpirePinnedPool(handle) = 1;
1583 }
1584
1585 *ptr = umpire_allocator_allocate(&pooled_allocator, nbytes);
1586
1587 return hypre_error_flag;
1588 }
1589
1590 HYPRE_Int
hypre_umpire_pinned_pooled_free(void * ptr)1591 hypre_umpire_pinned_pooled_free(void *ptr)
1592 {
1593 const hypre_Handle *handle = hypre_handle();
1594 const char *pool_name = hypre_HandleUmpirePinnedPoolName(handle);
1595 umpire_allocator pooled_allocator;
1596
1597 umpire_resourcemanager *rm_ptr = &hypre_HandleUmpireResourceMan(handle);
1598
1599 hypre_assert(umpire_resourcemanager_is_allocator_name(rm_ptr, pool_name));
1600
1601 umpire_resourcemanager_get_allocator_by_name(rm_ptr, pool_name, &pooled_allocator);
1602 umpire_allocator_deallocate(&pooled_allocator, ptr);
1603
1604 return hypre_error_flag;
1605 }
1606 #endif
1607
1608