1      subroutine util_setup_gpu_affinity
2#ifdef USE_CUDA_AFFINITY
3      use iso_fortran_env
4      use cudafor
5      use openacc
6#include "errquit.fh"
7#include "global.fh"
8      integer(INT32) :: num_devices, use_ngpus, my_gpu
9      integer :: ppn, node_rank, me
10      character*255 :: char_use_ngpus
11      integer(acc_device_kind) :: devicetype
12      me = ga_nodeid()
13      devicetype = NVIDIA
14      !
15      ! CUDA stuff
16      !
17      ! how many GPUs are detected
18      err = cudaGetDeviceCount(num_devices)
19      if (err.ne.0) call errquit('cudaGetDeviceCount',err,UNKNOWN_ERR)
20      if (num_devices.lt.1) call errquit('No GPU found!',0,UNKNOWN_ERR)
21      ! user prescribes how many to use
22      call util_getenv('NWCHEM_OPENACC_USE_NGPUS',char_use_ngpus)
23      if (me.eq.0) then
24        print*,'CU NWCHEM_OPENACC_USE_NGPUS=',trim(char_use_ngpus)
25      endif
26      if (len(trim(char_use_ngpus)).gt.0) then
27        read(char_use_ngpus,'(i255)') use_ngpus
28        if (use_ngpus.gt.num_devices) then
29          write(6,600) use_ngpus,num_devices
30          use_ngpus = num_devices
31        endif
32  600   format('CU you asked for ',
33     &         i2,' GPUs but only ',
34     &         i2,' are present')
35      else
36        use_ngpus=num_devices
37      endif
38      ! GA PPN undercounts with ARMCI-MPI
39      call util_getppn(ppn) ! THIS IS COLLECTIVE
40      ! this assumes PPN is constant across nodes (reasonable)
41      node_rank = modulo(me,ppn)
42      ! assign GPUs to GA process ranks within a node (round-robin)
43      my_gpu = modulo(node_rank,use_ngpus)
44      if (me.lt.ppn) then
45        write(6,700) 'node_rank',node_rank
46        write(6,700) 'use_ngpus',use_ngpus
47        write(6,700) 'ppn      ',ppn
48        write(6,700) 'ga_nodeid',me
49        write(6,700) 'my_gpu   ',my_gpu
50  700   format('CU ',a12,'=',i5)
51      endif
52      err = cudaSetDevice(my_gpu)
53      if (err.ne.0) call errquit('cudaSetDevice',my_gpu,UNKNOWN_ERR)
54      call acc_set_device_num(my_gpu,devicetype)
55#endif
56      end subroutine util_setup_gpu_affinity
57