1##*****************************************************************************
2#  AUTHOR:
3#    Michael Hinton <hinton@schedmd.com>
4#
5#  SYNOPSIS:
6#    X_AC_NVML
7#
8#  DESCRIPTION:
9#    Determine if NVIDIA's NVML API library exists (comes with CUDA)
10##*****************************************************************************
11
12# TODO: Check for the "CUDA_DEVICE_ORDER=PCI_BUS_ID" environmental var
13# If that is not set, emit a warning and point to the documentation
14# saying that this needs to be set for CUDA device numbers to match Slurm/NVML
15# device numbers, and that after setting, a reboot is required?
16
17# TODO: Check to make sure that nvidia driver is at least r384.40, or else there
18# is weirdness with PCI bus id lookups. See https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g9dc7be8cb41b6c77552c0fa0c36557c4
19
20AC_DEFUN([X_AC_NVML],
21[
22  AC_ARG_WITH(
23    [nvml],
24    AS_HELP_STRING(--without-nvml, Do not build NVIDIA NVML-related code),
25    []
26  )
27
28  if [test "x$with_nvml" = xno]; then
29     AC_MSG_WARN([support for nvml disabled])
30  else
31    # /usr/local/cuda/include is the main location. Others are just in case
32    nvml_includes="-I/usr/local/cuda/include -I/usr/cuda/include"
33    # Check for NVML header and library in the default locations
34    AC_MSG_RESULT([])
35    cppflags_save="$CPPFLAGS"
36    CPPFLAGS="$nvml_includes $CPPFLAGS"
37    AC_CHECK_HEADER([nvml.h], [ac_nvml_h=yes], [ac_nvml_h=no])
38    AC_CHECK_LIB([nvidia-ml], [nvmlInit], [ac_nvml=yes], [ac_nvml=no])
39    CPPFLAGS="$cppflags_save"
40    if test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes"; then
41      NVML_LIBS="-lnvidia-ml"
42      NVML_CPPFLAGS="$nvml_includes"
43      AC_DEFINE(HAVE_NVML, 1, [Define to 1 if NVML library found])
44    else
45      AC_MSG_WARN([unable to locate libnvidia-ml.so and/or nvml.h])
46    fi
47    AC_SUBST(NVML_LIBS)
48    AC_SUBST(NVML_CPPFLAGS)
49  fi
50  AM_CONDITIONAL(BUILD_NVML, test "$ac_nvml" = "yes" && test "$ac_nvml_h" = "yes")
51])
52