对@orthosteroid的答案有一点改进,这几乎可以确保生成一个唯一的临时文件,并且只需要一个而不是两个临时文件。
下面是
scripts/get_cuda_sm.sh
:
#!/bin/bash
#
# Prints the compute capability of the first CUDA device installed
# on the system, or alternatively the device whose index is the
# first command-line argument
device_index=${1:-0}
timestamp=$(date +%s.%N)
gcc_binary=$(which g++)
gcc_binary=${gcc_binary:-g++}
cuda_root=${CUDA_DIR:-/usr/local/cuda}
CUDA_INCLUDE_DIRS=${CUDA_INCLUDE_DIRS:-${cuda_root}/include}
CUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY:-${cuda_root}/lib64/libcudart.so}
generated_binary="/tmp/cuda-compute-version-helper-$$-$timestamp"
# create a 'here document' that is code we compile and use to probe the card
source_code="$(cat << EOF
#include <stdio.h>
#include <cuda_runtime_api.h>
int main()
{
cudaDeviceProp prop;
cudaError_t status;
int device_count;
status = cudaGetDeviceCount(&device_count);
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceCount() failed: %s\n", cudaGetErrorString(status));
return -1;
}
if (${device_index} >= device_count) {
fprintf(stderr, "Specified device index %d exceeds the maximum (the device count on this system is %d)\n", ${device_index}, device_count);
return -1;
}
status = cudaGetDeviceProperties(&prop, ${device_index});
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceProperties() for device ${device_index} failed: %s\n", cudaGetErrorString(status));
return -1;
}
int v = prop.major * 10 + prop.minor;
printf("%d\\n", v);
}
EOF
)"
echo "$source_code" | $gcc_binary -x c++ -I"$CUDA_INCLUDE_DIRS" -o "$generated_binary" - -x none "$CUDA_CUDART_LIBRARY"
# probe the card and cleanup
$generated_binary
rm $generated_binary
下面是
CMakeLists.txt
或CMake模块:
if (NOT CUDA_TARGET_COMPUTE_CAPABILITY)
if("$ENV{CUDA_SM}" STREQUAL "")
set(ENV{CUDA_INCLUDE_DIRS} "${CUDA_INCLUDE_DIRS}")
set(ENV{CUDA_CUDART_LIBRARY} "${CUDA_CUDART_LIBRARY}")
set(ENV{CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
execute_process(COMMAND
bash -c "${CMAKE_CURRENT_SOURCE_DIR}/scripts/get_cuda_sm.sh"
OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITY_)
else()
set(CUDA_TARGET_COMPUTE_CAPABILITY_ $ENV{CUDA_SM})
endif()
set(CUDA_TARGET_COMPUTE_CAPABILITY "${CUDA_TARGET_COMPUTE_CAPABILITY_}"
CACHE STRING "CUDA compute capability of the (first) CUDA device on \
the system, in XY format (like the X.Y format but no dot); see table \
of features and capabilities by capability X.Y value at \
https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications")
execute_process(COMMAND
bash -c "echo -n $(echo ${CUDA_TARGET_COMPUTE_CAPABILITY})"
OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITY)
execute_process(COMMAND
bash -c "echo ${CUDA_TARGET_COMPUTE_CAPABILITY} | sed 's/^\\([0-9]\\)\\([0-9]\\)/\\1.\\2/;' | xargs echo -n"
OUTPUT_VARIABLE FORMATTED_COMPUTE_CAPABILITY)
message(STATUS
"CUDA device-side code will assume compute capability \
${FORMATTED_COMPUTE_CAPABILITY}")
endif()
set(CUDA_GENCODE
"arch=compute_${CUDA_TARGET_COMPUTE_CAPABILITY}, code=compute_${CUDA_TARGET_COMPUTE_CAPABILITY}")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode ${CUDA_GENCODE} )