1# - Tools for building CUDA C files: libraries and build dependencies. 2# This script locates the NVIDIA CUDA C tools. It should work on linux, windows, 3# and mac and should be reasonably up to date with CUDA C releases. 4# 5# This script makes use of the standard find_package arguments of <VERSION>, 6# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA 7# was found. 8# 9# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the prefix 10# cannot be determined by the location of nvcc in the system path and REQUIRED 11# is specified to find_package(). To use a different installed version of the 12# toolkit set the environment variable CUDA_BIN_PATH before running cmake 13# (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default /usr/local/cuda) 14# or set CUDA_TOOLKIT_ROOT_DIR after configuring. If you change the value of 15# CUDA_TOOLKIT_ROOT_DIR, various components that depend on the path will be 16# relocated. 17# 18# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain 19# platforms, or to use a cuda runtime not installed in the default location. In 20# newer versions of the toolkit the cuda library is included with the graphics 21# driver- be sure that the driver version matches what is needed by the cuda 22# runtime version. 23# 24# The following variables affect the behavior of the macros in the script (in 25# alphebetical order). Note that any of these flags can be changed multiple 26# times in the same directory before calling CUDA_ADD_EXECUTABLE, 27# CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX or CUDA_WRAP_SRCS. 28# 29# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) 30# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. 31# Note that making this different from the host code when generating object 32# or C files from CUDA code just won't work, because size_t gets defined by 33# nvcc in the generated source. If you compile to PTX and then load the 34# file yourself, you can mix bit sizes between device and host. 35# 36# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) 37# -- Set to ON if you want the custom build rule to be attached to the source 38# file in Visual Studio. Turn OFF if you add the same cuda file to multiple 39# targets. 40# 41# This allows the user to build the target from the CUDA file; however, bad 42# things can happen if the CUDA source file is added to multiple targets. 43# When performing parallel builds it is possible for the custom build 44# command to be run more than once and in parallel causing cryptic build 45# errors. VS runs the rules for every source file in the target, and a 46# source can have only one rule no matter how many projects it is added to. 47# When the rule is run from multiple targets race conditions can occur on 48# the generated file. Eventually everything will get built, but if the user 49# is unaware of this behavior, there may be confusion. It would be nice if 50# this script could detect the reuse of source files across multiple targets 51# and turn the option off for the user, but no good solution could be found. 52# 53# CUDA_BUILD_CUBIN (Default OFF) 54# -- Set to ON to enable and extra compilation pass with the -cubin option in 55# Device mode. The output is parsed and register, shared memory usage is 56# printed during build. 57# 58# CUDA_BUILD_EMULATION (Default OFF for device mode) 59# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files 60# when CUDA_BUILD_EMULATION is TRUE. 61# 62# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) 63# -- Set to the path you wish to have the generated files placed. If it is 64# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. 65# Intermediate files will always be placed in 66# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. 67# 68# CUDA_HOST_COMPILATION_CPP (Default ON) 69# -- Set to OFF for C compilation of host code. 70# 71# CUDA_NVCC_FLAGS 72# CUDA_NVCC_FLAGS_<CONFIG> 73# -- Additional NVCC command line arguments. NOTE: multiple arguments must be 74# semi-colon delimited (e.g. --compiler-options;-Wall) 75# 76# CUDA_PROPAGATE_HOST_FLAGS (Default ON) 77# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration 78# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the 79# host compiler through nvcc's -Xcompiler flag. This helps make the 80# generated host code match the rest of the system better. Sometimes 81# certain flags give nvcc problems, and this will help you turn the flag 82# propagation off. This does not affect the flags supplied directly to nvcc 83# via CUDA_NVCC_FLAGS or through the OPTION flags specified through 84# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for 85# shared library compilation are not affected by this flag. 86# 87# CUDA_VERBOSE_BUILD (Default OFF) 88# -- Set to ON to see all the commands used when building the CUDA file. When 89# using a Makefile generator the value defaults to VERBOSE (run make 90# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will 91# always print the output. 92# 93# The script creates the following macros (in alphebetical order): 94# 95# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) 96# -- Adds the cufft library to the target (can be any target). Handles whether 97# you are in emulation mode or not. 98# 99# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) 100# -- Adds the cublas library to the target (can be any target). Handles 101# whether you are in emulation mode or not. 102# 103# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... 104# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) 105# -- Creates an executable "cuda_target" which is made up of the files 106# specified. All of the non CUDA C files are compiled using the standard 107# build rules specified by CMAKE and the cuda files are compiled to object 108# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is 109# added automatically to include_directories(). Some standard CMake target 110# calls can be used on the target after calling this macro 111# (e.g. set_target_properties and target_link_libraries), but setting 112# properties that adjust compilation flags will not affect code compiled by 113# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, 114# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. 115# 116# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... 117# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) 118# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. 119# 120# CUDA_BUILD_CLEAN_TARGET() 121# -- Creates a convience target that deletes all the dependency files 122# generated. You should make clean after running this target to ensure the 123# dependency files get regenerated. 124# 125# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] 126# [OPTIONS ...] ) 127# -- Returns a list of generated files from the input source files to be used 128# with ADD_LIBRARY or ADD_EXECUTABLE. 129# 130# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) 131# -- Returns a list of PTX files generated from the input source files. 132# 133# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) 134# -- Sets the directories that should be passed to nvcc 135# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu 136# files. 137# 138# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... 139# [STATIC | SHARED | MODULE] [OPTIONS ...] ) 140# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, 141# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this 142# function under the hood. 143# 144# Given the list of files (file0 file1 ... fileN) this macro generates 145# custom commands that generate either PTX or linkable objects (use "PTX" or 146# "OBJ" for the format argument to switch). Files that don't end with .cu 147# or have the HEADER_FILE_ONLY property are ignored. 148# 149# The arguments passed in after OPTIONS are extra command line options to 150# give to nvcc. You can also specify per configuration options by 151# specifying the name of the configuration followed by the options. General 152# options must preceed configuration specific options. Not all 153# configurations need to be specified, only the ones provided will be used. 154# 155# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" 156# DEBUG -g 157# RELEASE --use_fast_math 158# RELWITHDEBINFO --use_fast_math;-g 159# MINSIZEREL --use_fast_math 160# 161# For certain configurations (namely VS generating object files with 162# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will 163# be produced for the given cuda file. This is because when you add the 164# cuda file to Visual Studio it knows that this file produces an object file 165# and will link in the resulting object file automatically. 166# 167# This script will also generate a separate cmake script that is used at 168# build time to invoke nvcc. This is for several reasons. 169# 170# 1. nvcc can return negative numbers as return values which confuses 171# Visual Studio into thinking that the command succeeded. The script now 172# checks the error codes and produces errors when there was a problem. 173# 174# 2. nvcc has been known to not delete incomplete results when it 175# encounters problems. This confuses build systems into thinking the 176# target was generated when in fact an unusable file exists. The script 177# now deletes the output files if there was an error. 178# 179# 3. By putting all the options that affect the build into a file and then 180# make the build rule dependent on the file, the output files will be 181# regenerated when the options change. 182# 183# This script also looks at optional arguments STATIC, SHARED, or MODULE to 184# determine when to target the object compilation for a shared library. 185# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in 186# CUDA_ADD_LIBRARY. On some systems special flags are added for building 187# objects intended for shared libraries. A preprocessor macro, 188# <target_name>_EXPORTS is defined when a shared library compilation is 189# detected. 190# 191# Flags passed into add_definitions with -D or /D are passed along to nvcc. 192# 193# The script defines the following variables: 194# 195# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. 196# CUDA_VERSION_MINOR -- The minor version. 197# CUDA_VERSION 198# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR 199# 200# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). 201# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the 202# SDK. This script will not directly support finding 203# specific libraries or headers, as that isn't 204# supported by NVIDIA. If you want to change 205# libraries when the path changes see the 206# FindCUDA.cmake script for an example of how to clear 207# these variables. There are also examples of how to 208# use the CUDA_SDK_ROOT_DIR to locate headers or 209# libraries, if you so choose (at your own risk). 210# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically 211# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. 212# CUDA_LIBRARIES -- Cuda RT library. 213# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT 214# implementation (alternative to: 215# CUDA_ADD_CUFFT_TO_TARGET macro) 216# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS 217# implementation (alterative to: 218# CUDA_ADD_CUBLAS_TO_TARGET macro). 219# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. 220# Only available for CUDA version 3.2+. 221# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. 222# Only available for CUDA version 3.2+. 223# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library. 224# Only available for CUDA version 4.0+. 225# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. 226# Only available for CUDA version 3.2+. 227# Windows only. 228# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. 229# Only available for CUDA version 3.2+. 230# Windows only. 231# 232# 233# James Bigler, NVIDIA Corp (nvidia.com - jbigler) 234# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html 235# 236# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. 237# 238# Copyright (c) 2007-2009 239# Scientific Computing and Imaging Institute, University of Utah 240# 241# This code is licensed under the MIT License. See the FindCUDA.cmake script 242# for the text of the license. 243 244# The MIT License 245# 246# License for the specific language governing rights and limitations under 247# Permission is hereby granted, free of charge, to any person obtaining a 248# copy of this software and associated documentation files (the "Software"), 249# to deal in the Software without restriction, including without limitation 250# the rights to use, copy, modify, merge, publish, distribute, sublicense, 251# and/or sell copies of the Software, and to permit persons to whom the 252# Software is furnished to do so, subject to the following conditions: 253# 254# The above copyright notice and this permission notice shall be included 255# in all copies or substantial portions of the Software. 256# 257# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 258# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 259# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 260# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 261# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 262# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 263# DEALINGS IN THE SOFTWARE. 264# 265############################################################################### 266 267# FindCUDA.cmake 268 269# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3) 270cmake_policy(PUSH) 271cmake_minimum_required(VERSION 2.6.3) 272cmake_policy(POP) 273 274if (POLICY CMP0054) 275 #cmake_policy(PUSH) 276 cmake_policy(SET CMP0054 NEW) 277 #cmake_policy(POP) 278endif() 279 280# This macro helps us find the location of helper files we will need the full path to 281macro(CUDA_FIND_HELPER_FILE _name _extension) 282 set(_full_name "${_name}.${_extension}") 283 # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being 284 # processed. Using this variable, we can pull out the current path, and 285 # provide a way to get access to the other files we need local to here. 286 get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 287 set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}") 288 if(NOT EXISTS "${CUDA_${_name}}") 289 set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA") 290 if(CUDA_FIND_REQUIRED) 291 message(FATAL_ERROR "${error_message}") 292 else() 293 if(NOT CUDA_FIND_QUIETLY) 294 message(STATUS "${error_message}") 295 endif() 296 endif() 297 endif() 298 # Set this variable as internal, so the user isn't bugged with it. 299 set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) 300endmacro(CUDA_FIND_HELPER_FILE) 301 302##################################################################### 303## CUDA_INCLUDE_NVCC_DEPENDENCIES 304## 305 306# So we want to try and include the dependency file if it exists. If 307# it doesn't exist then we need to create an empty one, so we can 308# include it. 309 310# If it does exist, then we need to check to see if all the files it 311# depends on exist. If they don't then we should clear the dependency 312# file and regenerate it later. This covers the case where a header 313# file has disappeared or moved. 314 315macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file) 316 set(CUDA_NVCC_DEPEND) 317 set(CUDA_NVCC_DEPEND_REGENERATE FALSE) 318 319 320 # Include the dependency file. Create it first if it doesn't exist . The 321 # INCLUDE puts a dependency that will force CMake to rerun and bring in the 322 # new info when it changes. DO NOT REMOVE THIS (as I did and spent a few 323 # hours figuring out why it didn't work. 324 if(NOT EXISTS ${dependency_file}) 325 file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") 326 endif() 327 # Always include this file to force CMake to run again next 328 # invocation and rebuild the dependencies. 329 #message("including dependency_file = ${dependency_file}") 330 include(${dependency_file}) 331 332 # Now we need to verify the existence of all the included files 333 # here. If they aren't there we need to just blank this variable and 334 # make the file regenerate again. 335# if(DEFINED CUDA_NVCC_DEPEND) 336# message("CUDA_NVCC_DEPEND set") 337# else() 338# message("CUDA_NVCC_DEPEND NOT set") 339# endif() 340 if(CUDA_NVCC_DEPEND) 341 #message("CUDA_NVCC_DEPEND found") 342 foreach(f ${CUDA_NVCC_DEPEND}) 343 # message("searching for ${f}") 344 if(NOT EXISTS ${f}) 345 #message("file ${f} not found") 346 set(CUDA_NVCC_DEPEND_REGENERATE TRUE) 347 endif() 348 endforeach(f) 349 else(CUDA_NVCC_DEPEND) 350 #message("CUDA_NVCC_DEPEND false") 351 # No dependencies, so regenerate the file. 352 set(CUDA_NVCC_DEPEND_REGENERATE TRUE) 353 endif(CUDA_NVCC_DEPEND) 354 355 #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}") 356 # No incoming dependencies, so we need to generate them. Make the 357 # output depend on the dependency file itself, which should cause the 358 # rule to re-run. 359 if(CUDA_NVCC_DEPEND_REGENERATE) 360 set(CUDA_NVCC_DEPEND ${dependency_file}) 361 #message("Generating an empty dependency_file: ${dependency_file}") 362 file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") 363 endif(CUDA_NVCC_DEPEND_REGENERATE) 364 365endmacro(CUDA_INCLUDE_NVCC_DEPENDENCIES) 366 367############################################################################### 368############################################################################### 369# Setup variables' defaults 370############################################################################### 371############################################################################### 372 373# Allow the user to specify if the device code is supposed to be 32 or 64 bit. 374if(CMAKE_SIZEOF_VOID_P EQUAL 8) 375 set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON) 376else() 377 set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF) 378endif() 379option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT}) 380 381# Attach the build rule to the source file in VS. This option 382option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file. Enable only when the CUDA source file is added to at most one target." ON) 383 384# Prints out extra information about the cuda file during compilation 385option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF) 386 387# Set whether we are using emulation or device mode. 388option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF) 389 390# Where to put the generated output. 391set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files. If blank it will default to the CMAKE_CURRENT_BINARY_DIR") 392 393# Parse HOST_COMPILATION mode. 394option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON) 395 396# Extra user settable flags 397set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.") 398 399# Propagate the host flags to the host compiler via -Xcompiler 400option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON) 401 402# Specifies whether the commands used when compiling the .cu file will be printed out. 403option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) 404 405mark_as_advanced( 406 CUDA_64_BIT_DEVICE_CODE 407 CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE 408 CUDA_GENERATED_OUTPUT_DIR 409 CUDA_HOST_COMPILATION_CPP 410 CUDA_NVCC_FLAGS 411 CUDA_PROPAGATE_HOST_FLAGS 412 ) 413 414# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we 415# need to add another entry for the CMAKE_BUILD_TYPE. We also need to add the 416# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo) 417# for completeness. We need run this loop in order to accomodate the addition 418# of extra configuration types. Duplicate entries will be removed by 419# REMOVE_DUPLICATES. 420set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) 421list(REMOVE_DUPLICATES CUDA_configuration_types) 422foreach(config ${CUDA_configuration_types}) 423 string(TOUPPER ${config} config_upper) 424 set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.") 425 mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper}) 426endforeach() 427 428############################################################################### 429############################################################################### 430# Locate CUDA, Set Build Type, etc. 431############################################################################### 432############################################################################### 433 434# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, 435# if they have then clear the cache variables, so that will be detected again. 436if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") 437 unset(CUDA_NVCC_EXECUTABLE CACHE) 438 unset(CUDA_TOOLKIT_INCLUDE CACHE) 439 unset(CUDA_CUDART_LIBRARY CACHE) 440 # Make sure you run this before you unset CUDA_VERSION. 441 if(CUDA_VERSION VERSION_EQUAL "3.0") 442 # This only existed in the 3.0 version of the CUDA toolkit 443 unset(CUDA_CUDARTEMU_LIBRARY CACHE) 444 endif() 445 unset(CUDA_VERSION CACHE) 446 unset(CUDA_CUDA_LIBRARY CACHE) 447 unset(CUDA_cublas_LIBRARY CACHE) 448 unset(CUDA_cublasemu_LIBRARY CACHE) 449 unset(CUDA_cufft_LIBRARY CACHE) 450 unset(CUDA_cufftemu_LIBRARY CACHE) 451 unset(CUDA_curand_LIBRARY CACHE) 452 unset(CUDA_cusparse_LIBRARY CACHE) 453 unset(CUDA_npp_LIBRARY CACHE) 454 unset(CUDA_nvcuvenc_LIBRARY CACHE) 455 unset(CUDA_nvcuvid_LIBRARY CACHE) 456endif() 457 458if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") 459 # No specific variables to catch. Use this kind of code before calling 460 # find_package(CUDA) to clean up any variables that may depend on this path. 461 462 # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE) 463 # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE) 464endif() 465 466# Search for the cuda distribution. 467if(NOT CUDA_TOOLKIT_ROOT_DIR) 468 469 # Search in the CUDA_BIN_PATH first. 470 find_path(CUDA_TOOLKIT_ROOT_DIR 471 NAMES nvcc nvcc.exe 472 PATHS 473 ENV CUDA_PATH 474 ENV CUDA_BIN_PATH 475 PATH_SUFFIXES bin bin64 476 DOC "Toolkit location." 477 NO_DEFAULT_PATH 478 ) 479 # Now search default paths 480 find_path(CUDA_TOOLKIT_ROOT_DIR 481 NAMES nvcc nvcc.exe 482 PATHS /usr/local/bin 483 /usr/local/cuda/bin 484 DOC "Toolkit location." 485 ) 486 487 if (CUDA_TOOLKIT_ROOT_DIR) 488 string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR}) 489 # We need to force this back into the cache. 490 set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE) 491 endif(CUDA_TOOLKIT_ROOT_DIR) 492 if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) 493 if(CUDA_FIND_REQUIRED) 494 message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR") 495 elseif(NOT CUDA_FIND_QUIETLY) 496 message("CUDA_TOOLKIT_ROOT_DIR not found or specified") 497 endif() 498 endif (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) 499endif (NOT CUDA_TOOLKIT_ROOT_DIR) 500 501# CUDA_NVCC_EXECUTABLE 502find_program(CUDA_NVCC_EXECUTABLE 503 NAMES nvcc 504 PATHS "${CUDA_TOOLKIT_ROOT_DIR}" 505 ENV CUDA_PATH 506 ENV CUDA_BIN_PATH 507 PATH_SUFFIXES bin bin64 508 NO_DEFAULT_PATH 509 ) 510# Search default search paths, after we search our own set of paths. 511find_program(CUDA_NVCC_EXECUTABLE nvcc) 512mark_as_advanced(CUDA_NVCC_EXECUTABLE) 513 514if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) 515 # Compute the version. 516 execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) 517 string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) 518 string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) 519 set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") 520 mark_as_advanced(CUDA_VERSION) 521else() 522 # Need to set these based off of the cached value 523 string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}") 524 string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") 525endif() 526 527# Always set this convenience variable 528set(CUDA_VERSION_STRING "${CUDA_VERSION}") 529 530# Here we need to determine if the version we found is acceptable. We will 531# assume that is unless CUDA_FIND_VERSION_EXACT or CUDA_FIND_VERSION is 532# specified. The presence of either of these options checks the version 533# string and signals if the version is acceptable or not. 534set(_cuda_version_acceptable TRUE) 535# 536if(CUDA_FIND_VERSION_EXACT AND NOT CUDA_VERSION VERSION_EQUAL CUDA_FIND_VERSION) 537 set(_cuda_version_acceptable FALSE) 538endif() 539# 540if(CUDA_FIND_VERSION AND CUDA_VERSION VERSION_LESS CUDA_FIND_VERSION) 541 set(_cuda_version_acceptable FALSE) 542endif() 543# 544if(NOT _cuda_version_acceptable) 545 set(_cuda_error_message "Requested CUDA version ${CUDA_FIND_VERSION}, but found unacceptable version ${CUDA_VERSION}") 546 if(CUDA_FIND_REQUIRED) 547 message("${_cuda_error_message}") 548 elseif(NOT CUDA_FIND_QUIETLY) 549 message("${_cuda_error_message}") 550 endif() 551endif() 552 553# CUDA_TOOLKIT_INCLUDE 554find_path(CUDA_TOOLKIT_INCLUDE 555 device_functions.h # Header included in toolkit 556 PATHS "${CUDA_TOOLKIT_ROOT_DIR}" 557 ENV CUDA_PATH 558 ENV CUDA_INC_PATH 559 PATH_SUFFIXES include 560 NO_DEFAULT_PATH 561 ) 562# Search default search paths, after we search our own set of paths. 563find_path(CUDA_TOOLKIT_INCLUDE device_functions.h) 564mark_as_advanced(CUDA_TOOLKIT_INCLUDE) 565 566# Set the user list of include dir to nothing to initialize it. 567set (CUDA_NVCC_INCLUDE_ARGS_USER "") 568set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) 569 570macro(FIND_LIBRARY_LOCAL_FIRST _var _names _doc) 571 if(CMAKE_SIZEOF_VOID_P EQUAL 8) 572 # CUDA 3.2+ on Windows moved the library directoryies, so we need the new 573 # and old paths. 574 set(_cuda_64bit_lib_dir "lib/x64" "lib64") 575 endif() 576 # CUDA 3.2+ on Windows moved the library directories, so we need to new 577 # (lib/Win32) and the old path (lib). 578 find_library(${_var} 579 NAMES ${_names} 580 PATHS "${CUDA_TOOLKIT_ROOT_DIR}" 581 ENV CUDA_PATH 582 ENV CUDA_LIB_PATH 583 PATH_SUFFIXES ${_cuda_64bit_lib_dir} "lib/Win32" "lib" 584 DOC ${_doc} 585 NO_DEFAULT_PATH 586 ) 587 # Search default search paths, after we search our own set of paths. 588 find_library(${_var} NAMES ${_names} 589 PATH_SUFFIXES 590 nvidia-current 591 nvidia-current-updates 592 DOC ${_doc}) 593endmacro() 594 595# CUDA_LIBRARIES 596find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library") 597if(CUDA_VERSION VERSION_EQUAL "3.0") 598 # The cudartemu library only existed for the 3.0 version of CUDA. 599 find_library_local_first(CUDA_CUDARTEMU_LIBRARY cudartemu "\"cudartemu\" library") 600 mark_as_advanced( 601 CUDA_CUDARTEMU_LIBRARY 602 ) 603endif() 604# If we are using emulation mode and we found the cudartemu library then use 605# that one instead of cudart. 606if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) 607 set(CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY}) 608else() 609 set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) 610endif() 611if(APPLE) 612 # We need to add the path to cudart to the linker using rpath, since the 613 # library name for the cuda libraries is prepended with @rpath. 614 if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) 615 get_filename_component(_cuda_path_to_cudart "${CUDA_CUDARTEMU_LIBRARY}" PATH) 616 else() 617 get_filename_component(_cuda_path_to_cudart "${CUDA_CUDART_LIBRARY}" PATH) 618 endif() 619 # 620 # EDIT 16/11/13 Removed in an attempt to fix install since XCode 5.0.x 621 #if(_cuda_path_to_cudart) 622 # list(APPEND CUDA_LIBRARIES -Wl,-rpath "-Wl,${_cuda_path_to_cudart}") 623 #endif() 624 # 625endif() 626 627# 1.1 toolkit on linux doesn't appear to have a separate library on 628# some platforms. 629find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).") 630 631## 632## NOTE: OSKAR change: we dont think we need libcuda.so in the link list. 633## if this is no longer the case remove the comments from the section below 634## to add libcuda.so back into CUDA_LIBRARIES. 635## 636# Add cuda library to the link line only if it is found. 637#if (CUDA_CUDA_LIBRARY) 638# set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) 639#endif(CUDA_CUDA_LIBRARY) 640 641mark_as_advanced( 642 CUDA_CUDA_LIBRARY 643 CUDA_CUDART_LIBRARY) 644 645####################### 646# Look for some of the toolkit helper libraries 647macro(FIND_CUDA_HELPER_LIBS _name) 648 find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library") 649 mark_as_advanced(CUDA_${_name}_LIBRARY) 650endmacro(FIND_CUDA_HELPER_LIBS) 651 652####################### 653# Disable emulation for v3.1 onward 654if(CUDA_VERSION VERSION_GREATER "3.0") 655 if(CUDA_BUILD_EMULATION) 656 message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards. You must disable it to proceed. You have version ${CUDA_VERSION}.") 657 endif() 658endif() 659 660# Search for additional CUDA toolkit libraries. 661if(CUDA_VERSION VERSION_LESS "3.1") 662 # Emulation libraries aren't available in version 3.1 onward. 663 find_cuda_helper_libs(cufftemu) 664 find_cuda_helper_libs(cublasemu) 665endif() 666find_cuda_helper_libs(cufft) 667find_cuda_helper_libs(cublas) 668if(NOT CUDA_VERSION VERSION_LESS "3.2") 669 # cusparse showed up in version 3.2 670 find_cuda_helper_libs(cusparse) 671 find_cuda_helper_libs(curand) 672 if (WIN32) 673 find_cuda_helper_libs(nvcuvenc) 674 find_cuda_helper_libs(nvcuvid) 675 endif() 676endif() 677if(NOT CUDA_VERSION VERSION_LESS "4.0") 678 find_cuda_helper_libs(npp) 679endif() 680 681if (CUDA_BUILD_EMULATION) 682 set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY}) 683 set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY}) 684else() 685 set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY}) 686 set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) 687endif() 688 689######################## 690# Look for the SDK stuff. As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with 691# NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory 692find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h 693 "$ENV{NVSDKCOMPUTE_ROOT}/C" 694 "$ENV{NVSDKCUDA_ROOT}" 695 "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" 696 "/Developer/GPU\ Computing/C" 697 ) 698 699# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the 700# environment variables. 701set(CUDA_SDK_SEARCH_PATH 702 "${CUDA_SDK_ROOT_DIR}" 703 "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" 704 "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" 705 "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" 706 "$ENV{HOME}/NVIDIA_CUDA_SDK" 707 "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" 708 "/Developer/CUDA" 709 ) 710 711# Example of how to find an include file from the CUDA_SDK_ROOT_DIR 712 713# find_path(CUDA_CUT_INCLUDE_DIR 714# cutil.h 715# PATHS ${CUDA_SDK_SEARCH_PATH} 716# PATH_SUFFIXES "common/inc" 717# DOC "Location of cutil.h" 718# NO_DEFAULT_PATH 719# ) 720# # Now search system paths 721# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") 722 723# mark_as_advanced(CUDA_CUT_INCLUDE_DIR) 724 725 726# Example of how to find a library in the CUDA_SDK_ROOT_DIR 727 728# # cutil library is called cutil64 for 64 bit builds on windows. We don't want 729# # to get these confused, so we are setting the name based on the word size of 730# # the build. 731 732# if(CMAKE_SIZEOF_VOID_P EQUAL 8) 733# set(cuda_cutil_name cutil64) 734# else(CMAKE_SIZEOF_VOID_P EQUAL 8) 735# set(cuda_cutil_name cutil32) 736# endif(CMAKE_SIZEOF_VOID_P EQUAL 8) 737 738# find_library(CUDA_CUT_LIBRARY 739# NAMES cutil ${cuda_cutil_name} 740# PATHS ${CUDA_SDK_SEARCH_PATH} 741# # The new version of the sdk shows up in common/lib, but the old one is in lib 742# PATH_SUFFIXES "common/lib" "lib" 743# DOC "Location of cutil library" 744# NO_DEFAULT_PATH 745# ) 746# # Now search system paths 747# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") 748# mark_as_advanced(CUDA_CUT_LIBRARY) 749# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) 750 751 752 753############################# 754# Check for required components 755set(CUDA_FOUND TRUE) 756 757set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL 758 "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) 759set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL 760 "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) 761 762include(FindPackageHandleStandardArgs) 763find_package_handle_standard_args(CUDA DEFAULT_MSG 764 CUDA_TOOLKIT_ROOT_DIR 765 CUDA_NVCC_EXECUTABLE 766 CUDA_INCLUDE_DIRS 767 CUDA_CUDART_LIBRARY 768 _cuda_version_acceptable 769 ) 770 771 772 773############################################################################### 774############################################################################### 775# Macros 776############################################################################### 777############################################################################### 778 779############################################################################### 780# Add include directories to pass to the nvcc command. 781macro(CUDA_INCLUDE_DIRECTORIES) 782 foreach(dir ${ARGN}) 783 list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER -I${dir}) 784 endforeach(dir ${ARGN}) 785endmacro(CUDA_INCLUDE_DIRECTORIES) 786 787 788############################################################################## 789cuda_find_helper_file(parse_cubin cmake) 790cuda_find_helper_file(make2cmake cmake) 791cuda_find_helper_file(run_nvcc cmake) 792 793############################################################################## 794# Separate the OPTIONS out from the sources 795# 796macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options) 797 set( ${_sources} ) 798 set( ${_cmake_options} ) 799 set( ${_options} ) 800 set( _found_options FALSE ) 801 foreach(arg ${ARGN}) 802 if(arg STREQUAL "OPTIONS") 803 set( _found_options TRUE ) 804 elseif( 805 arg STREQUAL "WIN32" OR 806 arg STREQUAL "MACOSX_BUNDLE" OR 807 arg STREQUAL "EXCLUDE_FROM_ALL" OR 808 arg STREQUAL "STATIC" OR 809 arg STREQUAL "SHARED" OR 810 arg STREQUAL "MODULE" 811 ) 812 list(APPEND ${_cmake_options} ${arg}) 813 else() 814 if ( _found_options ) 815 list(APPEND ${_options} ${arg}) 816 else() 817 # Assume this is a file 818 list(APPEND ${_sources} ${arg}) 819 endif() 820 endif() 821 endforeach() 822endmacro() 823 824############################################################################## 825# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix 826# 827macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix) 828 set( _found_config ) 829 foreach(arg ${ARGN}) 830 # Determine if we are dealing with a perconfiguration flag 831 foreach(config ${CUDA_configuration_types}) 832 string(TOUPPER ${config} config_upper) 833 if (arg STREQUAL "${config_upper}") 834 set( _found_config _${arg}) 835 # Set arg to nothing to keep it from being processed further 836 set( arg ) 837 endif() 838 endforeach() 839 840 if ( arg ) 841 list(APPEND ${_option_prefix}${_found_config} "${arg}") 842 endif() 843 endforeach() 844endmacro() 845 846############################################################################## 847# Helper to add the include directory for CUDA only once 848function(CUDA_ADD_CUDA_INCLUDE_ONCE) 849 get_directory_property(_include_directories INCLUDE_DIRECTORIES) 850 set(_add TRUE) 851 if(_include_directories) 852 foreach(dir ${_include_directories}) 853 if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}") 854 set(_add FALSE) 855 endif() 856 endforeach() 857 endif() 858 if(_add) 859 include_directories(${CUDA_INCLUDE_DIRS}) 860 endif() 861endfunction() 862 863function(CUDA_BUILD_SHARED_LIBRARY shared_flag) 864 set(cmake_args ${ARGN}) 865 # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then 866 # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS. 867 list(FIND cmake_args SHARED _cuda_found_SHARED) 868 list(FIND cmake_args MODULE _cuda_found_MODULE) 869 list(FIND cmake_args STATIC _cuda_found_STATIC) 870 if( _cuda_found_SHARED GREATER -1 OR 871 _cuda_found_MODULE GREATER -1 OR 872 _cuda_found_STATIC GREATER -1) 873 set(_cuda_build_shared_libs) 874 else() 875 if (BUILD_SHARED_LIBS) 876 set(_cuda_build_shared_libs SHARED) 877 else() 878 set(_cuda_build_shared_libs STATIC) 879 endif() 880 endif() 881 set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE) 882endfunction() 883 884############################################################################## 885# Helper to avoid clashes of files with the same basename but different paths. 886# This doesn't attempt to do exactly what CMake internals do, which is to only 887# add this path when there is a conflict, since by the time a second collision 888# in names is detected it's already too late to fix the first one. For 889# consistency sake the relative path will be added to all files. 890function(CUDA_COMPUTE_BUILD_PATH path build_path) 891 #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})") 892 # Only deal with CMake style paths from here on out 893 file(TO_CMAKE_PATH "${path}" bpath) 894 if (IS_ABSOLUTE "${bpath}") 895 # Absolute paths are generally unnessary, especially if something like 896 # FILE(GLOB_RECURSE) is used to pick up the files. 897 file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") 898 endif() 899 900 # This recipie is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the 901 # CMake source. 902 903 # Remove leading / 904 string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") 905 # Avoid absolute paths by removing ':' 906 string(REPLACE ":" "_" bpath "${bpath}") 907 # Avoid relative paths that go up the tree 908 string(REPLACE "../" "__/" bpath "${bpath}") 909 # Avoid spaces 910 string(REPLACE " " "_" bpath "${bpath}") 911 912 # Strip off the filename. I wait until here to do it, since removin the 913 # basename can make a path that looked like path/../basename turn into 914 # path/.. (notice the trailing slash). 915 get_filename_component(bpath "${bpath}" PATH) 916 917 set(${build_path} "${bpath}" PARENT_SCOPE) 918 #message("${build_path} = ${bpath}") 919endfunction() 920 921############################################################################## 922# This helper macro populates the following variables and setups up custom 923# commands and targets to invoke the nvcc compiler to generate C or PTX source 924# dependent upon the format parameter. The compiler is invoked once with -M 925# to generate a dependency file and a second time with -cuda or -ptx to generate 926# a .cpp or .ptx file. 927# INPUT: 928# cuda_target - Target name 929# format - PTX or OBJ 930# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. 931# OPTIONS - Extra options to NVCC 932# OUTPUT: 933# generated_files - List of generated files 934############################################################################## 935############################################################################## 936 937macro(CUDA_WRAP_SRCS cuda_target format generated_files) 938 939 if( ${format} MATCHES "PTX" ) 940 set( compile_to_ptx ON ) 941 elseif( ${format} MATCHES "OBJ") 942 set( compile_to_ptx OFF ) 943 else() 944 message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS: '${format}'. Use OBJ or PTX.") 945 endif() 946 947 # Set up all the command line flags here, so that they can be overriden on a per target basis. 948 949 set(nvcc_flags "") 950 951 # Emulation if the card isn't present. 952 if (CUDA_BUILD_EMULATION) 953 # Emulation. 954 set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g) 955 else(CUDA_BUILD_EMULATION) 956 # Device mode. No flags necessary. 957 endif(CUDA_BUILD_EMULATION) 958 959 if(CUDA_HOST_COMPILATION_CPP) 960 set(CUDA_C_OR_CXX CXX) 961 else(CUDA_HOST_COMPILATION_CPP) 962 if(CUDA_VERSION VERSION_LESS "3.0") 963 set(nvcc_flags ${nvcc_flags} --host-compilation C) 964 else() 965 message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0. Removing --host-compilation C flag" ) 966 endif() 967 set(CUDA_C_OR_CXX C) 968 endif(CUDA_HOST_COMPILATION_CPP) 969 970 set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) 971 972 if(CUDA_64_BIT_DEVICE_CODE) 973 set(nvcc_flags ${nvcc_flags} -m64) 974 else() 975 set(nvcc_flags ${nvcc_flags} -m32) 976 endif() 977 978 # This needs to be passed in at this stage, because VS needs to fill out the 979 # value of VCInstallDir from within VS. 980 if(CMAKE_GENERATOR MATCHES "Visual Studio") 981 if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) 982 # Add nvcc flag for 64b Windows 983 set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" ) 984 endif() 985 endif() 986 987 # Figure out which configure we will use and pass that in as an argument to 988 # the script. We need to defer the decision until compilation time, because 989 # for VS projects we won't know if we are making a debug or release build 990 # until build time. 991 if(CMAKE_GENERATOR MATCHES "Visual Studio") 992 set( CUDA_build_configuration "$(ConfigurationName)" ) 993 else() 994 set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}") 995 endif() 996 997 # Initialize our list of includes with the user ones followed by the CUDA system ones. 998 set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}") 999 # Get the include directories for this directory and use them for our nvcc command. 1000 get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES) 1001 if(CUDA_NVCC_INCLUDE_DIRECTORIES) 1002 foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES}) 1003 list(APPEND CUDA_NVCC_INCLUDE_ARGS -I${dir}) 1004 endforeach() 1005 endif() 1006 1007 # Reset these variables 1008 set(CUDA_WRAP_OPTION_NVCC_FLAGS) 1009 foreach(config ${CUDA_configuration_types}) 1010 string(TOUPPER ${config} config_upper) 1011 set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}) 1012 endforeach() 1013 1014 CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN}) 1015 CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options}) 1016 1017 # Figure out if we are building a shared library. BUILD_SHARED_LIBS is 1018 # respected in CUDA_ADD_LIBRARY. 1019 set(_cuda_build_shared_libs FALSE) 1020 # SHARED, MODULE 1021 list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED) 1022 list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE) 1023 if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1) 1024 set(_cuda_build_shared_libs TRUE) 1025 endif() 1026 # STATIC 1027 list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC) 1028 if(_cuda_found_STATIC GREATER -1) 1029 set(_cuda_build_shared_libs FALSE) 1030 endif() 1031 1032 # CUDA_HOST_FLAGS 1033 if(_cuda_build_shared_libs) 1034 # If we are setting up code for a shared library, then we need to add extra flags for 1035 # compiling objects for shared libraries. 1036 set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS}) 1037 else() 1038 set(CUDA_HOST_SHARED_FLAGS) 1039 endif() 1040 # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags. We 1041 # always need to set the SHARED_FLAGS, though. 1042 if(CUDA_PROPAGATE_HOST_FLAGS) 1043 set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})") 1044 else() 1045 set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})") 1046 endif() 1047 1048 set(CUDA_NVCC_FLAGS_CONFIG "# Build specific configuration flags") 1049 # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake 1050 foreach(config ${CUDA_configuration_types}) 1051 string(TOUPPER ${config} config_upper) 1052 # CMAKE_FLAGS are strings and not lists. By not putting quotes around CMAKE_FLAGS 1053 # we convert the strings to lists (like we want). 1054 1055 if(CUDA_PROPAGATE_HOST_FLAGS) 1056 # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g 1057 if(CMAKE_COMPILER_IS_GNUCC AND CUDA_VERSION VERSION_LESS "3.0") 1058 string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") 1059 else() 1060 set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") 1061 endif() 1062 1063 set(CUDA_HOST_FLAGS "${CUDA_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") 1064 endif() 1065 1066 # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list 1067 # like it is currently), we can remove the quotes around the 1068 # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable. 1069 set(CUDA_NVCC_FLAGS_CONFIG "${CUDA_NVCC_FLAGS_CONFIG}\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})") 1070 endforeach() 1071 1072 if(compile_to_ptx) 1073 # Don't use any of the host compilation flags for PTX targets. 1074 set(CUDA_HOST_FLAGS) 1075 set(CUDA_NVCC_FLAGS_CONFIG) 1076 endif() 1077 1078 # Get the list of definitions from the directory property 1079 get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS) 1080 if(CUDA_NVCC_DEFINITIONS) 1081 foreach(_definition ${CUDA_NVCC_DEFINITIONS}) 1082 list(APPEND nvcc_flags "-D${_definition}") 1083 endforeach() 1084 endif() 1085 1086 if(_cuda_build_shared_libs) 1087 list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS") 1088 endif() 1089 1090 # Reset the output variable 1091 set(_cuda_wrap_generated_files "") 1092 1093 # Iterate over the macro arguments and create custom 1094 # commands for all the .cu files. 1095 foreach(file ${ARGN}) 1096 # Ignore any file marked as a HEADER_FILE_ONLY 1097 get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) 1098 if(${file} MATCHES ".*\\.cu$" AND NOT _is_header) 1099 1100 # Determine output directory 1101 cuda_compute_build_path("${file}" cuda_build_path) 1102 set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}") 1103 if(CUDA_GENERATED_OUTPUT_DIR) 1104 set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") 1105 else() 1106 if ( compile_to_ptx ) 1107 set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") 1108 else() 1109 set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}") 1110 endif() 1111 endif() 1112 1113 # Add a custom target to generate a c or ptx file. ###################### 1114 1115 get_filename_component( basename ${file} NAME ) 1116 if( compile_to_ptx ) 1117 set(generated_file_path "${cuda_compile_output_dir}") 1118 set(generated_file_basename "${cuda_target}_generated_${basename}.ptx") 1119 set(format_flag "-ptx") 1120 file(MAKE_DIRECTORY "${cuda_compile_output_dir}") 1121 else( compile_to_ptx ) 1122 set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") 1123 set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}") 1124 set(format_flag "-c") 1125 endif( compile_to_ptx ) 1126 1127 # Set all of our file names. Make sure that whatever filenames that have 1128 # generated_file_path in them get passed in through as a command line 1129 # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time 1130 # instead of configure time. 1131 set(generated_file "${generated_file_path}/${generated_file_basename}") 1132 set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend") 1133 set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend") 1134 set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt") 1135 set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake") 1136 1137 # Setup properties for obj files: 1138 if( NOT compile_to_ptx ) 1139 set_source_files_properties("${generated_file}" 1140 PROPERTIES 1141 EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. 1142 ) 1143 endif() 1144 1145 # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path. 1146 get_filename_component(file_path "${file}" PATH) 1147 if(IS_ABSOLUTE "${file_path}") 1148 set(source_file "${file}") 1149 else() 1150 set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") 1151 endif() 1152 1153 # Bring in the dependencies. Creates a variable CUDA_NVCC_DEPEND ####### 1154 cuda_include_nvcc_dependencies(${cmake_dependency_file}) 1155 1156 # Convience string for output ########################################### 1157 if(CUDA_BUILD_EMULATION) 1158 set(cuda_build_type "Emulation") 1159 else(CUDA_BUILD_EMULATION) 1160 set(cuda_build_type "Device") 1161 endif(CUDA_BUILD_EMULATION) 1162 1163 # Build the NVCC made dependency file ################################### 1164 set(build_cubin OFF) 1165 if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) 1166 if ( NOT compile_to_ptx ) 1167 set ( build_cubin ON ) 1168 endif( NOT compile_to_ptx ) 1169 endif( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) 1170 1171 # Configure the build script 1172 configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY) 1173 1174 # So if a user specifies the same cuda file as input more than once, you 1175 # can have bad things happen with dependencies. Here we check an option 1176 # to see if this is the behavior they want. 1177 if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) 1178 set(main_dep MAIN_DEPENDENCY ${source_file}) 1179 else() 1180 set(main_dep DEPENDS ${source_file}) 1181 endif() 1182 1183 if(CUDA_VERBOSE_BUILD) 1184 set(verbose_output ON) 1185 elseif(CMAKE_GENERATOR MATCHES "Makefiles") 1186 set(verbose_output "$(VERBOSE)") 1187 else() 1188 set(verbose_output OFF) 1189 endif() 1190 1191 # Create up the comment string 1192 file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") 1193 if(compile_to_ptx) 1194 set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}") 1195 else() 1196 set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") 1197 endif() 1198 1199 # Build the generated file and dependency file ########################## 1200 add_custom_command( 1201 OUTPUT ${generated_file} 1202 # These output files depend on the source_file and the contents of cmake_dependency_file 1203 ${main_dep} 1204 DEPENDS ${CUDA_NVCC_DEPEND} 1205 DEPENDS ${custom_target_script} 1206 # Make sure the output directory exists before trying to write to it. 1207 COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" 1208 COMMAND ${CMAKE_COMMAND} ARGS 1209 -D verbose:BOOL=${verbose_output} 1210 ${ccbin_flags} 1211 -D build_configuration:STRING=${CUDA_build_configuration} 1212 -D "generated_file:STRING=${generated_file}" 1213 -D "generated_cubin_file:STRING=${generated_cubin_file}" 1214 -P "${custom_target_script}" 1215 WORKING_DIRECTORY "${cuda_compile_intermediate_directory}" 1216 COMMENT "${cuda_build_comment_string}" 1217 ) 1218 1219 # Make sure the build system knows the file is generated. 1220 set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) 1221 1222 # Don't add the object file to the list of generated files if we are using 1223 # visual studio and we are attaching the build rule to the cuda file. VS 1224 # will add our object file to the linker automatically for us. 1225 set(cuda_add_generated_file TRUE) 1226 1227 if(NOT compile_to_ptx AND CMAKE_GENERATOR MATCHES "Visual Studio" AND CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) 1228 # Visual Studio 8 crashes when you close the solution when you don't add the object file. 1229 if(NOT CMAKE_GENERATOR MATCHES "Visual Studio 8") 1230 #message("Not adding ${generated_file}") 1231 set(cuda_add_generated_file FALSE) 1232 endif() 1233 endif() 1234 1235 if(cuda_add_generated_file) 1236 list(APPEND _cuda_wrap_generated_files ${generated_file}) 1237 endif() 1238 1239 # Add the other files that we want cmake to clean on a cleanup ########## 1240 list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}") 1241 list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES) 1242 set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") 1243 1244 endif(${file} MATCHES ".*\\.cu$" AND NOT _is_header) 1245 endforeach(file) 1246 1247 # Set the return parameter 1248 set(${generated_files} ${_cuda_wrap_generated_files}) 1249endmacro(CUDA_WRAP_SRCS) 1250 1251 1252############################################################################### 1253############################################################################### 1254# ADD LIBRARY 1255############################################################################### 1256############################################################################### 1257macro(CUDA_ADD_LIBRARY cuda_target) 1258 1259 CUDA_ADD_CUDA_INCLUDE_ONCE() 1260 1261 # Separate the sources from the options 1262 CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) 1263 CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN}) 1264 # Create custom commands and targets for each file. 1265 CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} 1266 ${_cmake_options} ${_cuda_shared_flag} 1267 OPTIONS ${_options} ) 1268 1269 # Add the library. 1270 add_library(${cuda_target} ${_cmake_options} 1271 ${_generated_files} 1272 ${_sources} 1273 ) 1274 1275 target_link_libraries(${cuda_target} 1276 ${CUDA_LIBRARIES} 1277 ) 1278 1279 # We need to set the linker language based on what the expected generated file 1280 # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. 1281 set_target_properties(${cuda_target} 1282 PROPERTIES 1283 LINKER_LANGUAGE ${CUDA_C_OR_CXX} 1284 ) 1285 1286endmacro(CUDA_ADD_LIBRARY cuda_target) 1287 1288 1289############################################################################### 1290############################################################################### 1291# ADD EXECUTABLE 1292############################################################################### 1293############################################################################### 1294macro(CUDA_ADD_EXECUTABLE cuda_target) 1295 1296 CUDA_ADD_CUDA_INCLUDE_ONCE() 1297 1298 # Separate the sources from the options 1299 CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) 1300 # Create custom commands and targets for each file. 1301 CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} ) 1302 1303 # Add the library. 1304 add_executable(${cuda_target} ${_cmake_options} 1305 ${_generated_files} 1306 ${_sources} 1307 ) 1308 1309 target_link_libraries(${cuda_target} 1310 ${CUDA_LIBRARIES} 1311 ) 1312 1313 # We need to set the linker language based on what the expected generated file 1314 # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. 1315 set_target_properties(${cuda_target} 1316 PROPERTIES 1317 LINKER_LANGUAGE ${CUDA_C_OR_CXX} 1318 ) 1319 1320endmacro(CUDA_ADD_EXECUTABLE cuda_target) 1321 1322 1323############################################################################### 1324############################################################################### 1325# CUDA COMPILE 1326############################################################################### 1327############################################################################### 1328macro(CUDA_COMPILE generated_files) 1329 1330 # Separate the sources from the options 1331 CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) 1332 # Create custom commands and targets for each file. 1333 CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options} 1334 OPTIONS ${_options} ) 1335 1336 set( ${generated_files} ${_generated_files}) 1337 1338endmacro(CUDA_COMPILE) 1339 1340 1341############################################################################### 1342############################################################################### 1343# CUDA COMPILE PTX 1344############################################################################### 1345############################################################################### 1346macro(CUDA_COMPILE_PTX generated_files) 1347 1348 # Separate the sources from the options 1349 CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) 1350 # Create custom commands and targets for each file. 1351 CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options} 1352 OPTIONS ${_options} ) 1353 1354 set( ${generated_files} ${_generated_files}) 1355 1356endmacro(CUDA_COMPILE_PTX) 1357 1358############################################################################### 1359############################################################################### 1360# CUDA ADD CUFFT TO TARGET 1361############################################################################### 1362############################################################################### 1363macro(CUDA_ADD_CUFFT_TO_TARGET target) 1364 if (CUDA_BUILD_EMULATION) 1365 target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY}) 1366 else() 1367 target_link_libraries(${target} ${CUDA_cufft_LIBRARY}) 1368 endif() 1369endmacro() 1370 1371############################################################################### 1372############################################################################### 1373# CUDA ADD CUBLAS TO TARGET 1374############################################################################### 1375############################################################################### 1376macro(CUDA_ADD_CUBLAS_TO_TARGET target) 1377 if (CUDA_BUILD_EMULATION) 1378 target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY}) 1379 else() 1380 target_link_libraries(${target} ${CUDA_cublas_LIBRARY}) 1381 endif() 1382endmacro() 1383 1384############################################################################### 1385############################################################################### 1386# CUDA BUILD CLEAN TARGET 1387############################################################################### 1388############################################################################### 1389macro(CUDA_BUILD_CLEAN_TARGET) 1390 # Call this after you add all your CUDA targets, and you will get a convience 1391 # target. You should also make clean after running this target to get the 1392 # build system to generate all the code again. 1393 1394 set(cuda_clean_target_name clean_cuda_depends) 1395 if (CMAKE_GENERATOR MATCHES "Visual Studio") 1396 string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name) 1397 endif() 1398 add_custom_target(${cuda_clean_target_name} 1399 COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES}) 1400 1401 # Clear out the variable, so the next time we configure it will be empty. 1402 # This is useful so that the files won't persist in the list after targets 1403 # have been removed. 1404 set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") 1405endmacro(CUDA_BUILD_CLEAN_TARGET) 1406