diff options
Diffstat (limited to 'cuda')
-rw-r--r-- | cuda/2d/algo.cu | 4 | ||||
-rw-r--r-- | cuda/2d/arith.cu | 18 | ||||
-rw-r--r-- | cuda/2d/astra.cu | 4 | ||||
-rw-r--r-- | cuda/2d/cgls.cu | 4 | ||||
-rw-r--r-- | cuda/2d/darthelper.cu | 4 | ||||
-rw-r--r-- | cuda/2d/em.cu | 4 | ||||
-rw-r--r-- | cuda/2d/fan_bp.cu | 19 | ||||
-rw-r--r-- | cuda/2d/fan_fp.cu | 15 | ||||
-rw-r--r-- | cuda/2d/fbp.cu | 4 | ||||
-rw-r--r-- | cuda/2d/fft.cu | 128 | ||||
-rw-r--r-- | cuda/2d/par_bp.cu | 14 | ||||
-rw-r--r-- | cuda/2d/par_fp.cu | 23 | ||||
-rw-r--r-- | cuda/2d/sart.cu | 6 | ||||
-rw-r--r-- | cuda/2d/sirt.cu | 4 | ||||
-rw-r--r-- | cuda/2d/util.cu | 73 | ||||
-rw-r--r-- | cuda/3d/algo3d.cu | 4 | ||||
-rw-r--r-- | cuda/3d/arith3d.cu | 40 | ||||
-rw-r--r-- | cuda/3d/astra3d.cu | 4 | ||||
-rw-r--r-- | cuda/3d/cgls3d.cu | 4 | ||||
-rw-r--r-- | cuda/3d/cone_bp.cu | 31 | ||||
-rw-r--r-- | cuda/3d/cone_fp.cu | 25 | ||||
-rw-r--r-- | cuda/3d/darthelper3d.cu | 4 | ||||
-rw-r--r-- | cuda/3d/fdk.cu | 10 | ||||
-rw-r--r-- | cuda/3d/mem3d.cu | 42 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.cu | 22 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.cu | 41 | ||||
-rw-r--r-- | cuda/3d/sirt3d.cu | 4 | ||||
-rw-r--r-- | cuda/3d/util3d.cu | 107 |
28 files changed, 288 insertions, 374 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index be15b25..c211729 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/arith.cu b/cuda/2d/arith.cu index 62eb8c2..45622d0 100644 --- a/cuda/2d/arith.cu +++ b/cuda/2d/arith.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -451,7 +451,7 @@ void processData(float* pfOut, unsigned int pitch, unsigned int width, unsigned devtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -462,7 +462,7 @@ void processData(float* pfOut, float fParam, unsigned int pitch, unsigned int wi devFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -473,7 +473,7 @@ void processData(float* pfOut1, float* pfOut2, float fParam1, float fParam2, uns devFFtoDD<op, 32><<<gridSize, blockSize>>>(pfOut1, pfOut2, fParam1, fParam2, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } @@ -485,7 +485,7 @@ void processData(float* pfOut, const float* pfIn, unsigned int pitch, unsigned i devDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -496,7 +496,7 @@ void processData(float* pfOut, const float* pfIn, float fParam, unsigned int pit devDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -507,7 +507,7 @@ void processData(float* pfOut, const float* pfIn1, const float* pfIn2, float fPa devDDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -518,7 +518,7 @@ void processData(float* pfOut, const float* pfIn1, const float* pfIn2, unsigned devDDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 7ff1c95..4752b5f 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index e7238b9..9c2df68 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu index b466840..48ec4b0 100644 --- a/cuda/2d/darthelper.cu +++ b/cuda/2d/darthelper.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index df140ec..203032c 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index 7bba302..2068d03 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -322,13 +322,12 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, else devFanBP<false><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } - cudaThreadSynchronize(); - cudaTextForceKernelsCompletion(); + ok = checkCuda(cudaStreamSynchronize(stream), "FanBP"); cudaStreamDestroy(stream); - return true; + return ok; } bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, @@ -354,13 +353,12 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { devFanBP<true><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } - cudaThreadSynchronize(); - cudaTextForceKernelsCompletion(); + ok = checkCuda(cudaStreamSynchronize(stream), "FanBP_FBPWeighted"); cudaStreamDestroy(stream); - return true; + return ok; } // D_projData is a pointer to one padded sinogram line @@ -382,11 +380,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale); - cudaThreadSynchronize(); - cudaTextForceKernelsCompletion(); - - return true; + return checkCuda(cudaThreadSynchronize(), "FanBP_SART"); } bool FanBP(float* D_volumeData, unsigned int volumePitch, diff --git a/cuda/2d/fan_fp.cu b/cuda/2d/fan_fp.cu index 60c02f8..342ca4c 100644 --- a/cuda/2d/fan_fp.cu +++ b/cuda/2d/fan_fp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -268,16 +268,17 @@ bool FanFP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices) FanFPvertical<<<dimGrid, dimBlock, 0, stream2>>>(D_projData, projPitch, i, blockStart, blockEnd, dims, outputScale); - cudaStreamDestroy(stream1); - cudaStreamDestroy(stream2); + bool ok = true; - cudaThreadSynchronize(); + ok &= checkCuda(cudaStreamSynchronize(stream1), "fan_fp hor"); + cudaStreamDestroy(stream1); - cudaTextForceKernelsCompletion(); + ok &= checkCuda(cudaStreamSynchronize(stream2), "fan_fp ver"); + cudaStreamDestroy(stream2); cudaFreeArray(D_dataArray); - return true; + return ok; } bool FanFP(float* D_volumeData, unsigned int volumePitch, diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu index 4fc3983..7acbf6d 100644 --- a/cuda/2d/fbp.cu +++ b/cuda/2d/fbp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 8361ad2..08acfd4 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -40,33 +40,18 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. using namespace astra; -// TODO: evaluate what we want to do in these situations: - -#define CHECK_ERROR(errorMessage) do { \ - cudaError_t err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error %s : %s", \ - errorMessage,cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } } while (0) - -#define SAFE_CALL( call) do { \ - cudaError err = call; \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error: %s ", \ - cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } \ - err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error: %s : ", \ - cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } } while (0) - - namespace astraCUDA { +bool checkCufft(cufftResult err, const char *msg) +{ + if (err != CUFFT_SUCCESS) { + ASTRA_ERROR("%s: CUFFT error %d.", msg, err); + return false; + } else { + return true; + } +} + __global__ static void applyFilter_kernel(int _iProjectionCount, int _iFreqBinCount, cufftComplex * _pSinogram, @@ -115,7 +100,8 @@ static void rescaleInverseFourier(int _iProjectionCount, int _iDetectorCount, rescaleInverseFourier_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount, _iDetectorCount, _pfInFourierOutput); - CHECK_ERROR("rescaleInverseFourier_kernel failed"); + + checkCuda(cudaThreadSynchronize(), "rescaleInverseFourier"); } void applyFilter(int _iProjectionCount, int _iFreqBinCount, @@ -128,7 +114,8 @@ void applyFilter(int _iProjectionCount, int _iFreqBinCount, applyFilter_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount, _iFreqBinCount, _pSinogram, _pFilter); - CHECK_ERROR("applyFilter_kernel failed"); + + checkCuda(cudaThreadSynchronize(), "applyFilter"); } static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, @@ -136,24 +123,22 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, cufftComplex * _pDevTargetComplex) { cufftHandle plan; - cufftResult result; - result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount); - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to plan 1d r2c fft"); + if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount), "invokeCudaFFT plan")) { return false; } - result = cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex); - cufftDestroy(plan); + if (!checkCufft(cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex), "invokeCudaFFT exec")) { + cufftDestroy(plan); + return false; + } - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to exec 1d r2c fft"); + if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaFFT sync")) { + cufftDestroy(plan); return false; } + cufftDestroy(plan); return true; } @@ -162,26 +147,25 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount, float * _pfDevTarget) { cufftHandle plan; - cufftResult result; - result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount); - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to plan 1d c2r fft"); + if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount), "invokeCudaIFFT plan")) { return false; } - // todo: why do we have to get rid of the const qualifier? - result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, - (cufftReal *)_pfDevTarget); - cufftDestroy(plan); - - if(result != CUFFT_SUCCESS) + // Getting rid of the const qualifier is due to cufft API issue? + if (!checkCufft(cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, + (cufftReal *)_pfDevTarget), "invokeCudaIFFT exec")) { - ASTRA_ERROR("Failed to exec 1d c2r fft"); + cufftDestroy(plan); return false; } + if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaIFFT sync")) { + cufftDestroy(plan); + return false; + } + + cufftDestroy(plan); return true; } @@ -189,14 +173,12 @@ bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount, cufftComplex ** _ppDevComplex) { size_t bufferSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; - SAFE_CALL(cudaMalloc((void **)_ppDevComplex, bufferSize)); - return true; + return checkCuda(cudaMalloc((void **)_ppDevComplex, bufferSize), "fft allocateComplexOnDevice"); } bool freeComplexOnDevice(cufftComplex * _pDevComplex) { - SAFE_CALL(cudaFree(_pDevComplex)); - return true; + return checkCuda(cudaFree(_pDevComplex), "fft freeComplexOnDevice"); } bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount, @@ -204,9 +186,7 @@ bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount, cufftComplex * _pDevComplexTarget) { size_t memSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; - SAFE_CALL(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice)); - - return true; + return checkCuda(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice), "fft uploadComplexArrayToDevice"); } bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, @@ -217,25 +197,30 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, float * pfDevRealFFTSource = NULL; size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; - SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize)); - SAFE_CALL(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize)); + if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize), "runCudaFFT malloc")) + return false; + if (!checkCuda(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize), "runCudaFFT memset")) { + cudaFree(pfDevRealFFTSource); + return false; + } for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++) { const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch; float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { + cudaFree(pfDevRealFFTSource); + return false; + } } bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, pfDevRealFFTSource, _pDevTargetComplex); if(!bResult) - { return false; - } - SAFE_CALL(cudaFree(pfDevRealFFTSource)); + cudaFree(pfDevRealFFTSource); return true; } @@ -248,7 +233,8 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, float * pfDevRealFFTTarget = NULL; size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; - SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize)); + if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize), "runCudaIFFT malloc")) + return false; bool bResult = invokeCudaIFFT(_iProjectionCount, _iFFTRealDetectorCount, _pDevSourceComplex, pfDevRealFFTTarget); @@ -260,17 +246,23 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount, pfDevRealFFTTarget); - SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch)); + if (!checkCuda(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch), "runCudaIFFT memset")) { + cudaFree(pfDevRealFFTTarget); + return false; + } for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++) { const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount; float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { + cudaFree(pfDevRealFFTTarget); + return false; + } } - SAFE_CALL(cudaFree(pfDevRealFFTTarget)); + cudaFree(pfDevRealFFTTarget); return true; } diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index f080abb..d7c3ab0 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -231,13 +231,12 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, else devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } - cudaThreadSynchronize(); - cudaTextForceKernelsCompletion(); + bool ok = checkCuda(cudaStreamSynchronize(stream), "par_bp"); cudaStreamDestroy(stream); - return true; + return ok; } bool BP(float* D_volumeData, unsigned int volumePitch, @@ -284,11 +283,8 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale); - cudaThreadSynchronize(); - - cudaTextForceKernelsCompletion(); - return true; + return checkCuda(cudaThreadSynchronize(), "BP_SART"); } diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index aac6cc3..e947428 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -305,8 +305,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock, (dims.iProjDets+g_detBlockSize-1)/g_detBlockSize); // angle blocks, detector blocks - // TODO: check if we can't immediately - // destroy the stream after use + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -323,19 +323,16 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaThreadSynchronize(); + bool ok = true; - cudaTextForceKernelsCompletion(); + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "par_fp"); + cudaStreamDestroy(*iter); + } cudaFreeArray(D_dataArray); - - return true; + return ok; } bool FP_simple(float* D_volumeData, unsigned int volumePitch, diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 12ad6df..89d58c2 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -54,7 +54,7 @@ void MUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int devMUL_SART<<<gridSize, blockSize>>>(pfOut, pfIn, pitch, width); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), "MUL_SART"); } diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index 2c5fdc9..b251734 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu index 533b86c..ac360f0 100644 --- a/cuda/2d/util.cu +++ b/cuda/2d/util.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -40,12 +40,8 @@ bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch, { size_t width = dims.iVolWidth; size_t height = dims.iVolHeight; - // TODO: memory order - cudaError_t err; - err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice); - ASTRA_CUDA_ASSERT(err); - assert(err == cudaSuccess); - return true; + + return checkCuda(cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice), "copyVolumeToDevice"); } bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch, @@ -54,10 +50,8 @@ bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch, { size_t width = dims.iVolWidth; size_t height = dims.iVolHeight; - // TODO: memory order - cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost); - ASTRA_CUDA_ASSERT(err); - return true; + + return checkCuda(cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost), "copyVolumeFromDevice"); } @@ -67,10 +61,8 @@ bool copySinogramFromDevice(float* out_data, unsigned int out_pitch, { size_t width = dims.iProjDets; size_t height = dims.iProjAngles; - // TODO: memory order - cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost); - ASTRA_CUDA_ASSERT(err); - return true; + + return checkCuda(cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost), "copySinogramFromDevice"); } bool copySinogramToDevice(const float* in_data, unsigned int in_pitch, @@ -79,20 +71,15 @@ bool copySinogramToDevice(const float* in_data, unsigned int in_pitch, { size_t width = dims.iProjDets; size_t height = dims.iProjAngles; - // TODO: memory order - cudaError_t err; - err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice); - ASTRA_CUDA_ASSERT(err); - return true; + + return checkCuda(cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice), "copySinogramToDevice"); } bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsigned int& pitch) { size_t p; - cudaError_t ret = cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height); - if (ret != cudaSuccess) { - reportCudaError(ret); + if (!checkCuda(cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height), "allocateVolume")) { ASTRA_ERROR("Failed to allocate %dx%d GPU buffer", width, height); return false; } @@ -104,11 +91,9 @@ bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsign return true; } -void zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned int height) +bool zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned int height) { - cudaError_t err; - err = cudaMemset2D(data, sizeof(float)*pitch, 0, sizeof(float)*width, height); - ASTRA_CUDA_ASSERT(err); + return checkCuda(cudaMemset2D(data, sizeof(float)*pitch, 0, sizeof(float)*width, height), "zeroVolume"); } bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims) @@ -121,14 +106,14 @@ bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimension return allocateVolume(D_ptr, dims.iProjDets, dims.iProjAngles, pitch); } -void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims) +bool zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims) { - zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight); + return zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight); } -void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims) +bool zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims) { - zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles); + return zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles); } void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims) @@ -231,7 +216,7 @@ float dotProduct2D(float* D_data, unsigned int pitch, // Step 1: reduce 2D from image to a single vector, taking sum of squares reduce2D<<< dimGrid2, dimBlock2, shared_mem2>>>(D_data, D_buf, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), "dotProduct2D reduce2D"); // Step 2: reduce 1D: add up elements in vector if (bx * by > 512) @@ -248,31 +233,21 @@ float dotProduct2D(float* D_data, unsigned int pitch, float x; cudaMemcpy(&x, D_res, 4, cudaMemcpyDeviceToHost); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), "dotProduct2D"); cudaFree(D_buf); return x; } - -bool cudaTextForceKernelsCompletion() +bool checkCuda(cudaError_t err, const char *msg) { - cudaError_t returnedCudaError = cudaThreadSynchronize(); - - if(returnedCudaError != cudaSuccess) { - ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError)); + if (err != cudaSuccess) { + ASTRA_ERROR("%s: CUDA error %d: %s.", msg, err, cudaGetErrorString(err)); return false; + } else { + return true; } - - return true; } -void reportCudaError(cudaError_t err) -{ - if(err != cudaSuccess) - ASTRA_ERROR("CUDA error %d: %s.", err, cudaGetErrorString(err)); -} - - } diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index 3a83194..4ef2052 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/3d/arith3d.cu b/cuda/3d/arith3d.cu index 2f4054e..b495f22 100644 --- a/cuda/3d/arith3d.cu +++ b/cuda/3d/arith3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -225,7 +225,7 @@ void processVol(CUdeviceptr* out, unsigned int pitch, unsigned int width, unsign devtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -238,7 +238,7 @@ void processVol(CUdeviceptr* out, float fParam, unsigned int pitch, unsigned int devFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -252,7 +252,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in, unsigned int pitch, uns devDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -266,7 +266,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in, float fParam, unsigned devDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -281,7 +281,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in1, const CUdeviceptr* in2 devDDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -296,7 +296,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in1, const CUdeviceptr* in2 devDDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } @@ -328,7 +328,7 @@ void processVol3D(cudaPitchedPtr& out, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -344,7 +344,7 @@ void processVol3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -362,7 +362,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensio pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -380,7 +380,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, c pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -400,7 +400,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitc pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -420,7 +420,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitc pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } @@ -448,7 +448,7 @@ void processSino3D(cudaPitchedPtr& out, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -464,7 +464,7 @@ void processSino3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -482,7 +482,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensi pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -500,7 +500,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -520,7 +520,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template<typename op> @@ -540,7 +540,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 51e76cd..3df52c8 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index 4829574..cbfb422 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 3525eb4..e265304 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -41,8 +41,7 @@ static texture3D gT_coneProjTexture; namespace astraCUDA3d { -#define ZSIZE 6 -static const unsigned int g_volBlockZ = ZSIZE; +static const unsigned int g_volBlockZ = 6; static const unsigned int g_anglesPerBlock = 32; static const unsigned int g_volBlockX = 16; @@ -77,7 +76,7 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) -template<bool FDKWEIGHT> +template<bool FDKWEIGHT, unsigned int ZSIZE> __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const astraCUDA3d::SDimensions3D dims, float fOutputScale) @@ -342,15 +341,25 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (params.bFDKWeighting) - dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else if (params.iRaysPerVoxelDim == 1) - dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else + if (params.bFDKWeighting) { + if (dims.iVolZ == 1) { + dev_cone_BP<true, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_cone_BP<true, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else if (params.iRaysPerVoxelDim == 1) { + if (dims.iVolZ == 1) { + dev_cone_BP<false, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_cone_BP<false, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } - cudaTextForceKernelsCompletion(); + // TODO: Consider not synchronizing here, if possible. + if (!checkCuda(cudaThreadSynchronize(), "cone_bp")) + return false; angles = angles + angleCount; // printf("%f\n", toc(t)); diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index bd607fa..2c3d1f6 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -169,6 +169,8 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -245,6 +247,8 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -402,8 +406,9 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, dim3 dimGrid( ((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV), (blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock); - // TODO: check if we can't immediately - // destroy the stream after use + + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -446,16 +451,16 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); + bool ok = true; - cudaTextForceKernelsCompletion(); + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "cone_fp"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } diff --git a/cuda/3d/darthelper3d.cu b/cuda/3d/darthelper3d.cu index d8ccfa6..c3b93c6 100644 --- a/cuda/3d/darthelper3d.cu +++ b/cuda/3d/darthelper3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 456694f..0b8d2ab 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -176,7 +176,8 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims); - cudaTextForceKernelsCompletion(); + if (!checkCuda(cudaThreadSynchronize(), "FDK_PreWeight")) + return false; if (bShortScan && dims.iProjAngles > 1) { ASTRA_DEBUG("Doing Parker weighting"); @@ -225,9 +226,10 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims); + if (!checkCuda(cudaThreadSynchronize(), "FDK_PreWeight ParkerWeight")) + return false; } - cudaTextForceKernelsCompletion(); return true; } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 50cfe75..ad2a0f3 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -58,15 +58,13 @@ struct SMemHandle3D_internal int maxBlockDimension() { int dev; - cudaError_t err = cudaGetDevice(&dev); - if (err != cudaSuccess) { + if (!checkCuda(cudaGetDevice(&dev), "maxBlockDimension getDevice")) { ASTRA_WARN("Error querying device"); return 0; } cudaDeviceProp props; - err = cudaGetDeviceProperties(&props, dev); - if (err != cudaSuccess) { + if (!checkCuda(cudaGetDeviceProperties(&props, dev), "maxBlockDimension getDviceProps")) { ASTRA_WARN("Error querying device %d properties", dev); return 0; } @@ -84,10 +82,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me size_t free = astraCUDA::availableGPUMemory(); - cudaError_t err; - err = cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z)); - - if (err != cudaSuccess) { + if (!checkCuda(cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z)), "allocateGPUMemory malloc3d")) { return MemHandle3D(); } @@ -98,8 +93,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me if (zero == INIT_ZERO) { - err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)); - if (err != cudaSuccess) { + if (!checkCuda(cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)), "allocateGPUMemory memset3d")) { cudaFree(hnd.ptr.ptr); return MemHandle3D(); } @@ -116,23 +110,22 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned { SMemHandle3D_internal& hnd = *handle.d.get(); assert(!hnd.arr); - cudaError_t err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)); - return err == cudaSuccess; + return checkCuda(cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)), "zeroGPUMemory"); } bool freeGPUMemory(MemHandle3D handle) { size_t free = astraCUDA::availableGPUMemory(); - cudaError_t err; + bool ok; if (handle.d->arr) - err = cudaFreeArray(handle.d->arr); + ok = checkCuda(cudaFreeArray(handle.d->arr), "freeGPUMemory array"); else - err = cudaFree(handle.d->ptr.ptr); + ok = checkCuda(cudaFree(handle.d->ptr.ptr), "freeGPUMemory"); size_t free2 = astraCUDA::availableGPUMemory(); ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2); - return err == cudaSuccess; + return ok; } bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos) @@ -160,9 +153,7 @@ bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D & p.kind = cudaMemcpyHostToDevice; - cudaError_t err = cudaMemcpy3D(&p); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyToGPUMemory"); } @@ -197,10 +188,7 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) p.kind = cudaMemcpyDeviceToHost; - cudaError_t err = cudaMemcpy3D(&p); - - return err == cudaSuccess; - + return checkCuda(cudaMemcpy3D(&p), "copyFromGPUMemory"); } @@ -409,9 +397,7 @@ bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions p.kind = cudaMemcpyHostToDevice; - cudaError_t err = cudaMemcpy3D(&p); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyIntoArray"); } diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 857a314..1dc75ce 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -41,8 +41,7 @@ static texture3D gT_par3DProjTexture; namespace astraCUDA3d { -#define ZSIZE 6 -static const unsigned int g_volBlockZ = ZSIZE; +static const unsigned int g_volBlockZ = 6; static const unsigned int g_anglesPerBlock = 32; static const unsigned int g_volBlockX = 16; @@ -77,6 +76,7 @@ static bool bindProjDataTexture(const cudaArray* array) } +template<unsigned int ZSIZE> __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -281,13 +281,19 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (params.iRaysPerVoxelDim == 1) - dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else + if (params.iRaysPerVoxelDim == 1) { + if (dims.iVolZ == 1) { + dev_par3D_BP<1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_par3D_BP<g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } - cudaTextForceKernelsCompletion(); + // TODO: Consider not synchronizing here, if possible. + if (!checkCuda(cudaThreadSynchronize(), "cone_bp")) + return false; angles = angles + angleCount; // printf("%f\n", toc(t)); diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 0a4a5cc..5daddc1 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -175,6 +175,8 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -251,7 +253,10 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float fDistCorr = sc.scale(a1, a2); + const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -359,6 +364,8 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -501,8 +508,8 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, dim3 dimGrid( ((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV), (blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock); - // TODO: check if we can't immediately - // destroy the stream after use + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -545,17 +552,16 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "par3d_fp"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } bool Par3DFP(cudaPitchedPtr D_volumeData, @@ -726,17 +732,16 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "Par3DFP_SumSqW"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index e68bde8..746a96b 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index 41eb9d2..71b5668 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- -Copyright: 2010-2018, imec Vision Lab, University of Antwerp - 2014-2018, CWI, Amsterdam +Copyright: 2010-2021, imec Vision Lab, University of Antwerp + 2014-2021, CWI, Amsterdam Contact: astra@astra-toolbox.com Website: http://www.astra-toolbox.com/ @@ -46,12 +46,9 @@ cudaPitchedPtr allocateVolumeData(const SDimensions3D& dims) cudaPitchedPtr volData; - cudaError err = cudaMalloc3D(&volData, extentV); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3D(&volData, extentV), "allocateVolumeData 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iVolX, dims.iVolY, dims.iVolZ); volData.ptr = 0; - // TODO: return 0 somehow? } return volData; @@ -65,12 +62,9 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims) cudaPitchedPtr projData; - cudaError err = cudaMalloc3D(&projData, extentP); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3D(&projData, extentP), "allocateProjectionData 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iProjU, dims.iProjAngles, dims.iProjV); projData.ptr = 0; - // TODO: return 0 somehow? } return projData; @@ -78,11 +72,11 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims) bool zeroVolumeData(cudaPitchedPtr& D_data, const SDimensions3D& dims) { char* t = (char*)D_data.ptr; - cudaError err; for (unsigned int z = 0; z < dims.iVolZ; ++z) { - err = cudaMemset2D(t, D_data.pitch, 0, dims.iVolX*sizeof(float), dims.iVolY); - ASTRA_CUDA_ASSERT(err); + if (!checkCuda(cudaMemset2D(t, D_data.pitch, 0, dims.iVolX*sizeof(float), dims.iVolY), "zeroVolumeData 3D")) { + return false; + } t += D_data.pitch * dims.iVolY; } return true; @@ -90,11 +84,11 @@ bool zeroVolumeData(cudaPitchedPtr& D_data, const SDimensions3D& dims) bool zeroProjectionData(cudaPitchedPtr& D_data, const SDimensions3D& dims) { char* t = (char*)D_data.ptr; - cudaError err; for (unsigned int z = 0; z < dims.iProjV; ++z) { - err = cudaMemset2D(t, D_data.pitch, 0, dims.iProjU*sizeof(float), dims.iProjAngles); - ASTRA_CUDA_ASSERT(err); + if (!checkCuda(cudaMemset2D(t, D_data.pitch, 0, dims.iProjU*sizeof(float), dims.iProjAngles), "zeroProjectionData 3D")) { + return false; + } t += D_data.pitch * dims.iProjAngles; } @@ -128,11 +122,7 @@ bool copyVolumeToDevice(const float* data, cudaPitchedPtr& D_data, const SDimens p.extent = extentV; p.kind = cudaMemcpyHostToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyVolumeToDevice 3D"); } bool copyProjectionsToDevice(const float* data, cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) @@ -163,11 +153,7 @@ bool copyProjectionsToDevice(const float* data, cudaPitchedPtr& D_data, const SD p.extent = extentV; p.kind = cudaMemcpyHostToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyProjectionsToDevice 3D"); } bool copyVolumeFromDevice(float* data, const cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) @@ -198,12 +184,9 @@ bool copyVolumeFromDevice(float* data, const cudaPitchedPtr& D_data, const SDime p.extent = extentV; p.kind = cudaMemcpyDeviceToHost; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyVolumeFromDevice 3D"); } + bool copyProjectionsFromDevice(float* data, const cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) { if (!pitch) @@ -232,11 +215,7 @@ bool copyProjectionsFromDevice(float* data, const cudaPitchedPtr& D_data, const p.extent = extentV; p.kind = cudaMemcpyDeviceToHost; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyProjectionsFromDevice 3D"); } bool duplicateVolumeData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, const SDimensions3D& dims) @@ -258,12 +237,9 @@ bool duplicateVolumeData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, con p.extent = extentV; p.kind = cudaMemcpyDeviceToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "duplicateVolumeData 3D"); } + bool duplicateProjectionData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, const SDimensions3D& dims) { cudaExtent extentV; @@ -283,11 +259,7 @@ bool duplicateProjectionData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, p.extent = extentV; p.kind = cudaMemcpyDeviceToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "duplicateProjectionData 3D"); } @@ -303,9 +275,8 @@ cudaArray* allocateVolumeArray(const SDimensions3D& dims) extentA.width = dims.iVolX; extentA.height = dims.iVolY; extentA.depth = dims.iVolZ; - cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + + if (!checkCuda(cudaMalloc3DArray(&cuArray, &channelDesc, extentA), "allocateVolumeArray 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iVolX, dims.iVolY, dims.iVolZ); return 0; } @@ -320,10 +291,8 @@ cudaArray* allocateProjectionArray(const SDimensions3D& dims) extentA.width = dims.iProjU; extentA.height = dims.iProjAngles; extentA.depth = dims.iProjV; - cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3DArray(&cuArray, &channelDesc, extentA), "allocateProjectionArray 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iProjU, dims.iProjAngles, dims.iProjV); return 0; } @@ -352,12 +321,9 @@ bool transferVolumeToArray(cudaPitchedPtr D_volumeData, cudaArray* array, const p.extent = extentA; p.kind = cudaMemcpyDeviceToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferVolumeToArray 3D"); } + bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, const SDimensions3D& dims) { cudaExtent extentA; @@ -379,13 +345,9 @@ bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, con p.extent = extentA; p.kind = cudaMemcpyDeviceToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferProjectionsToArray 3D"); } + bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims) { cudaExtent extentA; @@ -413,12 +375,7 @@ bool transferHostProjectionsToArray(const float *projData, cudaArray* array, con p.extent = extentA; p.kind = cudaMemcpyHostToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferHostProjectionsToArray 3D"); } @@ -430,18 +387,6 @@ float dotProduct3D(cudaPitchedPtr data, unsigned int x, unsigned int y, } -bool cudaTextForceKernelsCompletion() -{ - cudaError_t returnedCudaError = cudaThreadSynchronize(); - - if(returnedCudaError != cudaSuccess) { - ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError)); - return false; - } - - return true; -} - int calcNextPowerOfTwo(int _iValue) { int iOutput = 1; |