From 1eba5d772d768ce06ada5fbf2422a2bd4eae4fe0 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 4 Apr 2014 10:21:05 +0000 Subject: Detect arch/gencode options for nvcc --- build/linux/acinclude.m4 | 34 ++++++++++++++++++++++++++++++++++ build/linux/configure.ac | 8 ++++++++ 2 files changed, 42 insertions(+) diff --git a/build/linux/acinclude.m4 b/build/linux/acinclude.m4 index 5027e85..b6c1b2d 100644 --- a/build/linux/acinclude.m4 +++ b/build/linux/acinclude.m4 @@ -72,3 +72,37 @@ $NVCC -c -o conftest.o conftest.cu $$2 >conftest.nvcc.out 2>&1 || { } rm -f conftest.cu conftest.o conftest.nvcc.out ]) + +dnl ASTRA_FIND_NVCC_ARCHS(archs-to-try,cppflags-to-extend,output-list) +dnl Architectures should be of the form 10, 20, 30, 35, +dnl and should be in order. The last accepted one will be used for PTX output. +dnl All accepted ones will be used for cubin output. +AC_DEFUN([ASTRA_FIND_NVCC_ARCHS],[ +cat >conftest.cu <<_ACEOF +#include +int main() { + std::cout << "Test" << std::endl; + return 0; +} +_ACEOF +NVCC_lastarch="none" +NVCC_extra="" +NVCC_list="" +for arch in $1; do + NVCC_opt="-gencode=arch=compute_$arch,code=sm_$arch" + $NVCC -c -o conftest.o conftest.cu $$2 $NVCC_opt >conftest.nvcc.out 2>&1 && { + NVCC_lastarch=$arch + NVCC_extra="$NVCC_extra $NVCC_opt" + NVCC_list="$NVCC_list $arch" + } +done +if test $NVCC_lastarch != none; then + NVCC_extra="$NVCC_extra -gencode=arch=compute_${NVCC_lastarch},code=compute_${NVCC_lastarch}" + $3="$NVCC_list" + $2="$$2 $NVCC_extra" +else + $3="none" +fi +rm -f conftest.cu conftest.o conftest.nvcc.out +]) + diff --git a/build/linux/configure.ac b/build/linux/configure.ac index ad1d36c..f58ad0e 100644 --- a/build/linux/configure.ac +++ b/build/linux/configure.ac @@ -133,6 +133,14 @@ if test x"$HAVECUDA" = xyes; then ASTRA_CHECK_NVCC(HAVECUDA,NVCCFLAGS) fi AC_MSG_RESULT($HAVECUDA) + +if test x"$HAVECUDA" = xyes; then + AC_MSG_CHECKING([for nvcc archs]) + ASTRA_FIND_NVCC_ARCHS([10 12 20 30 35],NVCCFLAGS,NVCCARCHS) + AC_MSG_RESULT($NVCCARCHS) +fi + + AC_SUBST(HAVECUDA) AC_SUBST(LDFLAGS_CUDA) AC_SUBST(CPPFLAGS_CUDA) -- cgit v1.2.3