1##***************************************************************************** 2# AUTHOR: 3# Michael Hinton <hinton@schedmd.com> 4# 5# SYNOPSIS: 6# X_AC_NVML 7# 8# DESCRIPTION: 9# Determine if NVIDIA's NVML API library exists (comes with CUDA) 10##***************************************************************************** 11 12# TODO: Check for the "CUDA_DEVICE_ORDER=PCI_BUS_ID" environmental var 13# If that is not set, emit a warning and point to the documentation 14# saying that this needs to be set for CUDA device numbers to match Slurm/NVML 15# device numbers, and that after setting, a reboot is required? 16 17# TODO: Check to make sure that nvidia driver is at least r384.40, or else there 18# is weirdness with PCI bus id lookups. See https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g9dc7be8cb41b6c77552c0fa0c36557c4 19 20AC_DEFUN([X_AC_NVML], 21[ 22 AC_ARG_WITH( 23 [nvml], 24 AS_HELP_STRING(--without-nvml, Do not build NVIDIA NVML-related code), 25 [] 26 ) 27 28 if [test "x$with_nvml" = xno]; then 29 AC_MSG_WARN([support for nvml disabled]) 30 else 31 # /usr/local/cuda/include is the main location. Others are just in case 32 nvml_includes="-I/usr/local/cuda/include -I/usr/cuda/include" 33 # Check for NVML header and library in the default locations 34 AC_MSG_RESULT([]) 35 cppflags_save="$CPPFLAGS" 36 CPPFLAGS="$nvml_includes $CPPFLAGS" 37 AC_CHECK_HEADER([nvml.h], [ac_nvml_h=yes], [ac_nvml_h=no]) 38 AC_CHECK_LIB([nvidia-ml], [nvmlInit], [ac_nvml=yes], [ac_nvml=no]) 39 CPPFLAGS="$cppflags_save" 40 if test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes"; then 41 NVML_LIBS="-lnvidia-ml" 42 NVML_CPPFLAGS="$nvml_includes" 43 AC_DEFINE(HAVE_NVML, 1, [Define to 1 if NVML library found]) 44 else 45 AC_MSG_WARN([unable to locate libnvidia-ml.so and/or nvml.h]) 46 fi 47 AC_SUBST(NVML_LIBS) 48 AC_SUBST(NVML_CPPFLAGS) 49 fi 50 AM_CONDITIONAL(BUILD_NVML, test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes") 51]) 52