diff options
| author | Willem Jan Palenstijn <wjp@usecode.org> | 2016-02-02 14:25:01 +0100 | 
|---|---|---|
| committer | Willem Jan Palenstijn <wjp@usecode.org> | 2016-02-02 14:25:01 +0100 | 
| commit | 278618e2f70f6b1308bf9c0aa79f76137cc582c1 (patch) | |
| tree | c7f402b60e4baa529149a44bbe3fa2570ed610b2 | |
| parent | cd12edcd3ada212f27088a4e26e3eee5f89bcbad (diff) | |
| parent | b529ff854f0e1191108e31d6be294d31b50c666e (diff) | |
| download | astra-278618e2f70f6b1308bf9c0aa79f76137cc582c1.tar.gz astra-278618e2f70f6b1308bf9c0aa79f76137cc582c1.tar.bz2 astra-278618e2f70f6b1308bf9c0aa79f76137cc582c1.tar.xz astra-278618e2f70f6b1308bf9c0aa79f76137cc582c1.zip | |
Merge pull request #107 from wjp/multi
Add multi-GPU support to CompositeGeometryManager
| -rw-r--r-- | cuda/3d/mem3d.h | 2 | ||||
| -rw-r--r-- | include/astra/CompositeGeometryManager.h | 16 | ||||
| -rw-r--r-- | matlab/mex/astra_mex_c.cpp | 45 | ||||
| -rw-r--r-- | python/astra/__init__.py | 8 | ||||
| -rw-r--r-- | python/astra/astra.py | 4 | ||||
| -rw-r--r-- | python/astra/astra_c.pyx | 21 | ||||
| -rw-r--r-- | samples/matlab/s020_3d_multiGPU.m | 38 | ||||
| -rw-r--r-- | samples/python/s019_experimental_multires.py (renamed from samples/python/s018_experimental_multires.py) | 0 | ||||
| -rw-r--r-- | samples/python/s020_3d_multiGPU.py | 57 | ||||
| -rw-r--r-- | src/CompositeGeometryManager.cpp | 476 | ||||
| -rw-r--r-- | src/ConeProjectionGeometry3D.cpp | 3 | 
11 files changed, 518 insertions, 152 deletions
| diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 82bad19..acb72cb 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -87,6 +87,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)  bool freeGPUMemory(MemHandle3D handle); +bool setGPUIndex(int index); +  bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 6610151..49d02a7 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -50,9 +50,16 @@ class CProjectionGeometry3D;  class CProjector3D; +struct SGPUParams { +	std::vector<int> GPUIndices; +	size_t memory; +}; +  class _AstraExport CCompositeGeometryManager {  public: +	CCompositeGeometryManager(); +  	class CPart;  	typedef std::list<boost::shared_ptr<CPart> > TPartList;  	class CPart { @@ -139,10 +146,19 @@ public:  	bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);  	bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); +	void setGPUIndices(const std::vector<int>& GPUIndices); + +	static void setGlobalGPUParams(const SGPUParams& params); +  protected:  	bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); +	std::vector<int> m_GPUIndices; +	size_t m_iMaxSize; + + +	static SGPUParams* s_params;  };  } diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp index d34334c..fdf4f33 100644 --- a/matlab/mex/astra_mex_c.cpp +++ b/matlab/mex/astra_mex_c.cpp @@ -38,6 +38,7 @@ $Id$  #include "astra/Globals.h"  #ifdef ASTRA_CUDA  #include "../cuda/2d/darthelper.h" +#include "astra/CompositeGeometryManager.h"  #endif  using namespace std;  using namespace astra; @@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs   * Set active GPU   */  void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) -{  +{  #ifdef ASTRA_CUDA -	if (nrhs >= 2) { -		bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1])); -		if (!ret) -			mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1])); +	bool usage = false; +	if (nrhs != 2 && nrhs != 4) { +		usage = true; +	} + +	astra::SGPUParams params; +	params.memory = 0; + +	if (!usage && nrhs >= 4) { +		std::string s = mexToString(prhs[2]); +		if (s != "memory") { +			usage = true; +		} else { +			params.memory = (size_t)mxGetScalar(prhs[3]); +		} +	} + +	if (!usage && nrhs >= 2) { +		int n = mxGetN(prhs[1]) * mxGetM(prhs[1]); +		params.GPUIndices.resize(n); +		double* pdMatlabData = mxGetPr(prhs[1]); +		for (int i = 0; i < n; ++i) +			params.GPUIndices[i] = (int)pdMatlabData[i]; + + +		astra::CCompositeGeometryManager::setGlobalGPUParams(params); + + +		// Set first GPU +		if (n >= 1) { +			bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]); +			if (!ret) +				mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]); +		} +	} + +	if (usage) { +		mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])");  	}  #endif  } diff --git a/python/astra/__init__.py b/python/astra/__init__.py index 10ed74d..515d9a2 100644 --- a/python/astra/__init__.py +++ b/python/astra/__init__.py @@ -39,7 +39,7 @@ from . import log  from .optomo import OpTomo  import os -try: -    astra.set_gpu_index(int(os.environ['ASTRA_GPU_INDEX'])) -except KeyError: -    pass + +if 'ASTRA_GPU_INDEX' in os.environ: +    L = [ int(x) for x in os.environ['ASTRA_GPU_INDEX'].split(',') ] +    astra.set_gpu_index(L) diff --git a/python/astra/astra.py b/python/astra/astra.py index 26b1ff0..9328b6b 100644 --- a/python/astra/astra.py +++ b/python/astra/astra.py @@ -49,10 +49,10 @@ def version(printToScreen=False):      """      return a.version(printToScreen) -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0):      """Set default GPU index to use.      :param idx: GPU index      :type idx: :class:`int`      """ -    a.set_gpu_index(idx) +    a.set_gpu_index(idx, memory) diff --git a/python/astra/astra_c.pyx b/python/astra/astra_c.pyx index 6b246b6..2a9c816 100644 --- a/python/astra/astra_c.pyx +++ b/python/astra/astra_c.pyx @@ -31,6 +31,7 @@ import six  from .utils import wrap_from_bytes  from libcpp.string cimport string +from libcpp.vector cimport vector  from libcpp cimport bool  cdef extern from "astra/Globals.h" namespace "astra":      int getVersion() @@ -43,6 +44,12 @@ IF HAVE_CUDA==True:  ELSE:    def setGPUIndex():      pass +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra": +    cdef cppclass SGPUParams: +        vector[int] GPUIndices +        size_t memory +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra::CCompositeGeometryManager": +    void setGlobalGPUParams(SGPUParams&)  def credits():      six.print_("""The ASTRA Toolbox has been developed at the University of Antwerp and CWI, Amsterdam by @@ -70,8 +77,16 @@ def version(printToScreen=False):      else:          return getVersion() -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): +    import types +    import collections +    cdef SGPUParams params      if use_cuda()==True: -        ret = setGPUIndex(idx) +        if not isinstance(idx, collections.Iterable) or isinstance(idx, types.StringTypes): +            idx = (idx,) +        params.memory = memory +        params.GPUIndices = idx +        setGlobalGPUParams(params) +        ret = setGPUIndex(params.GPUIndices[0])          if not ret: -            six.print_("Failed to set GPU " + str(idx)) +            six.print_("Failed to set GPU " + str(params.GPUIndices[0])) diff --git a/samples/matlab/s020_3d_multiGPU.m b/samples/matlab/s020_3d_multiGPU.m new file mode 100644 index 0000000..bade325 --- /dev/null +++ b/samples/matlab/s020_3d_multiGPU.m @@ -0,0 +1,38 @@ +% ----------------------------------------------------------------------- +% This file is part of the ASTRA Toolbox +%  +% Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +%            2014-2015, CWI, Amsterdam +% License: Open Source under GPLv3 +% Contact: astra@uantwerpen.be +% Website: http://sf.net/projects/astra-toolbox +% ----------------------------------------------------------------------- + + +% Set up multi-GPU usage. +% This only works for 3D GPU forward projection and back projection. +astra_mex('set_gpu_index', [0 1]); + +% Optionally, you can also restrict the amount of GPU memory ASTRA will use. +% The line commented below sets this to 1GB. +%astra_mex('set_gpu_index', [0 1], 'memory', 1024*1024*1024); + +vol_geom = astra_create_vol_geom(1024, 1024, 1024); + +angles = linspace2(0, pi, 1024); +proj_geom = astra_create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles); + +% Create a simple hollow cube phantom +cube = zeros(1024,1024,1024); +cube(129:896,129:896,129:896) = 1; +cube(257:768,257:768,257:768) = 0; + +% Create projection data from this +[proj_id, proj_data] = astra_create_sino3d_cuda(cube, proj_geom, vol_geom); + +% Backproject projection data +[bproj_id, bproj_data] = astra_create_backprojection3d_cuda(proj_data, proj_geom, vol_geom); + +astra_mex_data3d('delete', proj_id); +astra_mex_data3d('delete', bproj_id); + diff --git a/samples/python/s018_experimental_multires.py b/samples/python/s019_experimental_multires.py index cf38e53..cf38e53 100644 --- a/samples/python/s018_experimental_multires.py +++ b/samples/python/s019_experimental_multires.py diff --git a/samples/python/s020_3d_multiGPU.py b/samples/python/s020_3d_multiGPU.py new file mode 100644 index 0000000..d6799c4 --- /dev/null +++ b/samples/python/s020_3d_multiGPU.py @@ -0,0 +1,57 @@ +#----------------------------------------------------------------------- +#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam +# +#Author: Daniel M. Pelt +#Contact: D.M.Pelt@cwi.nl +#Website: http://dmpelt.github.io/pyastratoolbox/ +# +# +#This file is part of the Python interface to the +#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). +# +#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with the Python interface to the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. +# +#----------------------------------------------------------------------- + +import astra +import numpy as np + +# Set up multi-GPU usage. +# This only works for 3D GPU forward projection and back projection. +astra.astra.set_gpu_index([0,1]) + +# Optionally, you can also restrict the amount of GPU memory ASTRA will use. +# The line commented below sets this to 1GB. +#astra.astra.set_gpu_index([0,1], memory=1024*1024*1024) + +vol_geom = astra.create_vol_geom(1024, 1024, 1024) + +angles = np.linspace(0, np.pi, 1024,False) +proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles) + +# Create a simple hollow cube phantom +cube = np.zeros((1024,1024,1024)) +cube[128:895,128:895,128:895] = 1 +cube[256:767,256:767,256:767] = 0 + +# Create projection data from this +proj_id, proj_data = astra.create_sino3d_gpu(cube, proj_geom, vol_geom) + +# Backproject projection data +bproj_id, bproj_data = astra.create_backprojection3d_gpu(proj_data, proj_geom, vol_geom) + +# Clean up. Note that GPU memory is tied up in the algorithm object, +# and main RAM in the data objects. +astra.data3d.delete(proj_id) +astra.data3d.delete(bproj_id) diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 41f6319..d1b713e 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -44,11 +44,31 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include "../cuda/3d/mem3d.h"  #include <cstring> +#include <sstream> + +#ifndef USE_PTHREADS +#include <boost/thread/mutex.hpp> +#include <boost/thread.hpp> +#endif  namespace astra { + +SGPUParams* CCompositeGeometryManager::s_params = 0; + +CCompositeGeometryManager::CCompositeGeometryManager() +{ +	m_iMaxSize = 0; + +	if (s_params) { +		m_iMaxSize = s_params->memory; +		m_GPUIndices = s_params->GPUIndices; +	} +} + +  // JOB: -//   +//  // VolumePart  // ProjectionPart  // FP-or-BP @@ -76,7 +96,6 @@ namespace astra {  //   (First approach: 0.5/0.5) -  bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)  {  	split.clear(); @@ -305,37 +324,37 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce  static size_t ceildiv(size_t a, size_t b) { -    return (a + b - 1) / b; +	return (a + b - 1) / b;  }  static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)  { -    size_t blockSize = maxBlock; -    size_t blockCount = ceildiv(sliceCount, blockSize); +	size_t blockSize = maxBlock; +	size_t blockCount = ceildiv(sliceCount, blockSize); -    // Increase number of blocks to be divisible by div -    size_t divCount = div * ceildiv(blockCount, div); +	// Increase number of blocks to be divisible by div +	size_t divCount = div * ceildiv(blockCount, div); -    // If divCount is above sqrt(number of slices), then -    // we can't guarantee divisibility by div, but let's try anyway -    if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { -        blockCount = divCount; -    } else { -        // If divisibility isn't achievable, we may want to optimize -        // differently. -        // TODO: Figure out how to model and optimize this. -    } +	// If divCount is above sqrt(number of slices), then +	// we can't guarantee divisibility by div, but let's try anyway +	if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { +		blockCount = divCount; +	} else { +		// If divisibility isn't achievable, we may want to optimize +		// differently. +		// TODO: Figure out how to model and optimize this. +	} -    // Final adjustment to make blocks more evenly sized -    // (This can't make the blocks larger) -    blockSize = ceildiv(sliceCount, blockCount);  +	// Final adjustment to make blocks more evenly sized +	// (This can't make the blocks larger) +	blockSize = ceildiv(sliceCount, blockCount); -    ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); +	ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); -    assert(blockSize <= maxBlock); -    assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); +	assert(blockSize <= maxBlock); +	assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); -    return blockSize; +	return blockSize;  }  template<class V, class P> @@ -848,6 +867,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector +static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) +{ +	CCompositeGeometryManager::CPart* output = iter->first; +	const CCompositeGeometryManager::TJobList& L = iter->second; + +	assert(!L.empty()); + +	bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; + +	size_t outx, outy, outz; +	output->getDims(outx, outy, outz); + +	if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { +		// just zero output? +		if (zero) { +			for (size_t z = 0; z < outz; ++z) { +				for (size_t y = 0; y < outy; ++y) { +					float* ptr = output->pData->getData(); +					ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); +					ptr += (y + output->subY) * (size_t)output->pData->getWidth(); +					ptr += output->subX; +					memset(ptr, 0, sizeof(float) * outx); +				} +			} +		} +		return true; +	} + + +	astraCUDA3d::SSubDimensions3D dstdims; +	dstdims.nx = output->pData->getWidth(); +	dstdims.pitch = dstdims.nx; +	dstdims.ny = output->pData->getHeight(); +	dstdims.nz = output->pData->getDepth(); +	dstdims.subnx = outx; +	dstdims.subny = outy; +	dstdims.subnz = outz; +	ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); +	dstdims.subx = output->subX; +	dstdims.suby = output->subY; +	dstdims.subz = output->subZ; +	float *dst = output->pData->getData(); + +	astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); +	bool ok = outputMem; + +	for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { +		const CCompositeGeometryManager::SJob &j = *i; + +		assert(j.pInput); + +		CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); +		Cuda3DProjectionKernel projKernel = ker3d_default; +		int detectorSuperSampling = 1; +		int voxelSuperSampling = 1; +		if (projector) { +			projKernel = projector->getProjectionKernel(); +			detectorSuperSampling = projector->getDetectorSuperSampling(); +			voxelSuperSampling = projector->getVoxelSuperSampling(); +		} + +		size_t inx, iny, inz; +		j.pInput->getDims(inx, iny, inz); +		astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + +		astraCUDA3d::SSubDimensions3D srcdims; +		srcdims.nx = j.pInput->pData->getWidth(); +		srcdims.pitch = srcdims.nx; +		srcdims.ny = j.pInput->pData->getHeight(); +		srcdims.nz = j.pInput->pData->getDepth(); +		srcdims.subnx = inx; +		srcdims.subny = iny; +		srcdims.subnz = inz; +		srcdims.subx = j.pInput->subX; +		srcdims.suby = j.pInput->subY; +		srcdims.subz = j.pInput->subZ; +		const float *src = j.pInput->pData->getDataConst(); + +		ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); +		if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + +		if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { +			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); +			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); + +			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + +			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); +			if (!ok) ASTRA_ERROR("Error performing sub-FP"); +			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); +		} else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { +			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); +			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + +			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + +			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); +			if (!ok) ASTRA_ERROR("Error performing sub-BP"); +			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); +		} else { +			assert(false); +		} + +		ok = astraCUDA3d::freeGPUMemory(inputMem); +		if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + +	} + +	ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); +	if (!ok) ASTRA_ERROR("Error copying output data from GPU"); +	 +	ok = astraCUDA3d::freeGPUMemory(outputMem); +	if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + +	return true; +} + + +class WorkQueue { +public: +	WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { +#ifdef USE_PTHREADS +		pthread_mutex_init(&m_mutex, 0); +#endif +		m_iter = m_jobs.begin(); +	} +	bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { +		lock(); + +		if (m_iter == m_jobs.end()) { +			unlock(); +			return false; +		} + +		i = m_iter++; + +		unlock(); + +		return true;	 +	} +#ifdef USE_PTHREADS +	void lock() { +		// TODO: check mutex op return values +		pthread_mutex_lock(&m_mutex); +	} +	void unlock() { +		// TODO: check mutex op return values +		pthread_mutex_unlock(&m_mutex); +	} +#else +	void lock() { +		m_mutex.lock(); +	} +	void unlock() { +		m_mutex.unlock(); +	} +#endif + +private: +	CCompositeGeometryManager::TJobSet &m_jobs; +	CCompositeGeometryManager::TJobSet::const_iterator m_iter; +#ifdef USE_PTHREADS +	pthread_mutex_t m_mutex; +#else +	boost::mutex m_mutex; +#endif +}; + +struct WorkThreadInfo { +	WorkQueue* m_queue; +	unsigned int m_iGPU; +}; + +#ifndef USE_PTHREADS + +void runEntries_boost(WorkThreadInfo* info) +{ +	ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); +	CCompositeGeometryManager::TJobSet::const_iterator i; +	while (info->m_queue->receive(i)) { +		ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); +		astraCUDA3d::setGPUIndex(info->m_iGPU); +		boost::this_thread::interruption_point(); +		doJob(i); +		boost::this_thread::interruption_point(); +	} +	ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); +} + + +#else + +void* runEntries_pthreads(void* data) { +	WorkThreadInfo* info = (WorkThreadInfo*)data; + +	ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + +	CCompositeGeometryManager::TJobSet::const_iterator i; + +	while (info->m_queue->receive(i)) { +		ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); +		astraCUDA3d::setGPUIndex(info->m_iGPU); +		pthread_testcancel(); +		doJob(i); +		pthread_testcancel(); +	} +	ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); + +	return 0; +} + +#endif + + +void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) { +	int iThreadCount = iGPUIndices.size(); + +	std::vector<WorkThreadInfo> infos; +#ifdef USE_PTHREADS +	std::vector<pthread_t> threads; +#else +	std::vector<boost::thread*> threads; +#endif +	infos.resize(iThreadCount); +	threads.resize(iThreadCount); + +	for (int i = 0; i < iThreadCount; ++i) { +		infos[i].m_queue = &queue; +		infos[i].m_iGPU = iGPUIndices[i]; +#ifdef USE_PTHREADS +		pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); +#else +		threads[i] = new boost::thread(runEntries_boost, &infos[i]); +#endif +	} + +	// Wait for them to finish +	for (int i = 0; i < iThreadCount; ++i) { +#ifdef USE_PTHREADS +		pthread_join(threads[i], 0); +#else +		threads[i]->join(); +		delete threads[i]; +		threads[i] = 0; +#endif +	} +} + + +void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices) +{ +	m_GPUIndices = GPUIndices; +} +  bool CCompositeGeometryManager::doJobs(TJobList &jobs)  {  	ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); @@ -859,140 +1132,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs)  		jobset[i->pOutput.get()].push_back(*i);  	} -	size_t maxSize = astraCUDA3d::availableGPUMemory(); +	size_t maxSize = m_iMaxSize;  	if (maxSize == 0) { -		ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); -		maxSize = 1024 * 1024 * 1024; +		// Get memory from first GPU. Not optimal... +		if (!m_GPUIndices.empty()) +			astraCUDA3d::setGPUIndex(m_GPUIndices[0]); +		maxSize = astraCUDA3d::availableGPUMemory(); +		if (maxSize == 0) { +			ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); +			maxSize = 1024 * 1024 * 1024; +		} else { +			ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); +		}  	} else { -		ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); +		ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize);  	}  	maxSize = (maxSize * 9) / 10;  	maxSize /= sizeof(float);  	int div = 1; - -	// TODO: Multi-GPU support +	if (!m_GPUIndices.empty()) +		div = m_GPUIndices.size();  	// Split jobs to fit  	TJobSet split;  	splitJobs(jobset, maxSize, div, split);  	jobset.clear(); -	// Run jobs -	 -	for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { - -		CPart* output = iter->first; -		TJobList& L = iter->second; - -		assert(!L.empty()); +	if (m_GPUIndices.size() <= 1) { -		bool zero = L.begin()->eMode == SJob::MODE_SET; +		// Run jobs +		ASTRA_DEBUG("Running single-threaded"); -		size_t outx, outy, outz; -		output->getDims(outx, outy, outz); +		if (!m_GPUIndices.empty()) +			astraCUDA3d::setGPUIndex(m_GPUIndices[0]); -		if (L.begin()->eType == SJob::JOB_NOP) { -			// just zero output? -			if (zero) { -				for (size_t z = 0; z < outz; ++z) { -					for (size_t y = 0; y < outy; ++y) { -						float* ptr = output->pData->getData(); -						ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); -						ptr += (y + output->subY) * (size_t)output->pData->getWidth(); -						ptr += output->subX; -						memset(ptr, 0, sizeof(float) * outx); -					} -				} -			} -			continue; +		for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { +			doJob(iter);  		} +	} else { -		astraCUDA3d::SSubDimensions3D dstdims; -		dstdims.nx = output->pData->getWidth(); -		dstdims.pitch = dstdims.nx; -		dstdims.ny = output->pData->getHeight(); -		dstdims.nz = output->pData->getDepth(); -		dstdims.subnx = outx; -		dstdims.subny = outy; -		dstdims.subnz = outz; -		ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); -		dstdims.subx = output->subX; -		dstdims.suby = output->subY; -		dstdims.subz = output->subZ; -		float *dst = output->pData->getData(); - -		astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); -		bool ok = outputMem; - -		for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { -			SJob &j = *i; - -			assert(j.pInput); - -			CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); -			Cuda3DProjectionKernel projKernel = ker3d_default; -			int detectorSuperSampling = 1; -			int voxelSuperSampling = 1; -			if (projector) { -				projKernel = projector->getProjectionKernel(); -				detectorSuperSampling = projector->getDetectorSuperSampling(); -				voxelSuperSampling = projector->getVoxelSuperSampling(); -			} - -			size_t inx, iny, inz; -			j.pInput->getDims(inx, iny, inz); -			astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); - -			astraCUDA3d::SSubDimensions3D srcdims; -			srcdims.nx = j.pInput->pData->getWidth(); -			srcdims.pitch = srcdims.nx; -			srcdims.ny = j.pInput->pData->getHeight(); -			srcdims.nz = j.pInput->pData->getDepth(); -			srcdims.subnx = inx; -			srcdims.subny = iny; -			srcdims.subnz = inz; -			srcdims.subx = j.pInput->subX; -			srcdims.suby = j.pInput->subY; -			srcdims.subz = j.pInput->subZ; -			const float *src = j.pInput->pData->getDataConst(); - -			ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); -			if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - -			if (j.eType == SJob::JOB_FP) { -				assert(dynamic_cast<CVolumePart*>(j.pInput.get())); -				assert(dynamic_cast<CProjectionPart*>(j.pOutput.get())); - -				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); - -				ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); -				if (!ok) ASTRA_ERROR("Error performing sub-FP"); -				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); -			} else if (j.eType == SJob::JOB_BP) { -				assert(dynamic_cast<CVolumePart*>(j.pOutput.get())); -				assert(dynamic_cast<CProjectionPart*>(j.pInput.get())); - -				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); - -				ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); -				if (!ok) ASTRA_ERROR("Error performing sub-BP"); -				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); -			} else { -				assert(false); -			} +		ASTRA_DEBUG("Running multi-threaded"); -			ok = astraCUDA3d::freeGPUMemory(inputMem); -			if (!ok) ASTRA_ERROR("Error freeing GPU memory"); +		WorkQueue wq(split); -		} +		runWorkQueue(wq, m_GPUIndices); -		ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); -		if (!ok) ASTRA_ERROR("Error copying output data from GPU"); -		 -		ok = astraCUDA3d::freeGPUMemory(outputMem); -		if (!ok) ASTRA_ERROR("Error freeing GPU memory");  	}  	return true; @@ -1000,6 +1186,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) + +//static +void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) +{ +	delete s_params; + +	s_params = new SGPUParams; +	*s_params = params; + +	ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); +	std::ostringstream s; +	s << "GPU indices:"; +	for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) +		s << " " << params.GPUIndices[i]; +	std::string ss = s.str(); +	ASTRA_DEBUG(ss.c_str()); +	ASTRA_DEBUG("Memory: %llu", params.memory); +} + +  }  #endif diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp index 99b4bf4..96b04fb 100644 --- a/src/ConeProjectionGeometry3D.cpp +++ b/src/ConeProjectionGeometry3D.cpp @@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,  	// Scale fS to detector plane  	fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); - -	ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU); -  }  void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, | 
