summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/algo.cu4
-rw-r--r--cuda/2d/arith.cu18
-rw-r--r--cuda/2d/astra.cu4
-rw-r--r--cuda/2d/cgls.cu4
-rw-r--r--cuda/2d/darthelper.cu4
-rw-r--r--cuda/2d/em.cu4
-rw-r--r--cuda/2d/fan_bp.cu19
-rw-r--r--cuda/2d/fan_fp.cu15
-rw-r--r--cuda/2d/fbp.cu4
-rw-r--r--cuda/2d/fft.cu128
-rw-r--r--cuda/2d/par_bp.cu14
-rw-r--r--cuda/2d/par_fp.cu23
-rw-r--r--cuda/2d/sart.cu6
-rw-r--r--cuda/2d/sirt.cu4
-rw-r--r--cuda/2d/util.cu73
15 files changed, 140 insertions, 184 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index be15b25..c211729 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/arith.cu b/cuda/2d/arith.cu
index 62eb8c2..45622d0 100644
--- a/cuda/2d/arith.cu
+++ b/cuda/2d/arith.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -451,7 +451,7 @@ void processData(float* pfOut, unsigned int pitch, unsigned int width, unsigned
devtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
template<typename op>
@@ -462,7 +462,7 @@ void processData(float* pfOut, float fParam, unsigned int pitch, unsigned int wi
devFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, fParam, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
template<typename op>
@@ -473,7 +473,7 @@ void processData(float* pfOut1, float* pfOut2, float fParam1, float fParam2, uns
devFFtoDD<op, 32><<<gridSize, blockSize>>>(pfOut1, pfOut2, fParam1, fParam2, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
@@ -485,7 +485,7 @@ void processData(float* pfOut, const float* pfIn, unsigned int pitch, unsigned i
devDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
template<typename op>
@@ -496,7 +496,7 @@ void processData(float* pfOut, const float* pfIn, float fParam, unsigned int pit
devDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, fParam, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
template<typename op>
@@ -507,7 +507,7 @@ void processData(float* pfOut, const float* pfIn1, const float* pfIn2, float fPa
devDDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, fParam, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
template<typename op>
@@ -518,7 +518,7 @@ void processData(float* pfOut, const float* pfIn1, const float* pfIn2, unsigned
devDDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), __FUNCTION__);
}
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 7ff1c95..4752b5f 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index e7238b9..9c2df68 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu
index b466840..48ec4b0 100644
--- a/cuda/2d/darthelper.cu
+++ b/cuda/2d/darthelper.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index df140ec..203032c 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 7bba302..2068d03 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -322,13 +322,12 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
else
devFanBP<false><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
- cudaThreadSynchronize();
- cudaTextForceKernelsCompletion();
+ ok = checkCuda(cudaStreamSynchronize(stream), "FanBP");
cudaStreamDestroy(stream);
- return true;
+ return ok;
}
bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
@@ -354,13 +353,12 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
devFanBP<true><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
- cudaThreadSynchronize();
- cudaTextForceKernelsCompletion();
+ ok = checkCuda(cudaStreamSynchronize(stream), "FanBP_FBPWeighted");
cudaStreamDestroy(stream);
- return true;
+ return ok;
}
// D_projData is a pointer to one padded sinogram line
@@ -382,11 +380,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
- cudaThreadSynchronize();
- cudaTextForceKernelsCompletion();
-
- return true;
+ return checkCuda(cudaThreadSynchronize(), "FanBP_SART");
}
bool FanBP(float* D_volumeData, unsigned int volumePitch,
diff --git a/cuda/2d/fan_fp.cu b/cuda/2d/fan_fp.cu
index 60c02f8..342ca4c 100644
--- a/cuda/2d/fan_fp.cu
+++ b/cuda/2d/fan_fp.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -268,16 +268,17 @@ bool FanFP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices)
FanFPvertical<<<dimGrid, dimBlock, 0, stream2>>>(D_projData, projPitch, i, blockStart, blockEnd, dims, outputScale);
- cudaStreamDestroy(stream1);
- cudaStreamDestroy(stream2);
+ bool ok = true;
- cudaThreadSynchronize();
+ ok &= checkCuda(cudaStreamSynchronize(stream1), "fan_fp hor");
+ cudaStreamDestroy(stream1);
- cudaTextForceKernelsCompletion();
+ ok &= checkCuda(cudaStreamSynchronize(stream2), "fan_fp ver");
+ cudaStreamDestroy(stream2);
cudaFreeArray(D_dataArray);
- return true;
+ return ok;
}
bool FanFP(float* D_volumeData, unsigned int volumePitch,
diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu
index 4fc3983..7acbf6d 100644
--- a/cuda/2d/fbp.cu
+++ b/cuda/2d/fbp.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 8361ad2..08acfd4 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -40,33 +40,18 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
using namespace astra;
-// TODO: evaluate what we want to do in these situations:
-
-#define CHECK_ERROR(errorMessage) do { \
- cudaError_t err = cudaThreadSynchronize(); \
- if( cudaSuccess != err) { \
- ASTRA_ERROR("Cuda error %s : %s", \
- errorMessage,cudaGetErrorString( err)); \
- exit(EXIT_FAILURE); \
- } } while (0)
-
-#define SAFE_CALL( call) do { \
- cudaError err = call; \
- if( cudaSuccess != err) { \
- ASTRA_ERROR("Cuda error: %s ", \
- cudaGetErrorString( err)); \
- exit(EXIT_FAILURE); \
- } \
- err = cudaThreadSynchronize(); \
- if( cudaSuccess != err) { \
- ASTRA_ERROR("Cuda error: %s : ", \
- cudaGetErrorString( err)); \
- exit(EXIT_FAILURE); \
- } } while (0)
-
-
namespace astraCUDA {
+bool checkCufft(cufftResult err, const char *msg)
+{
+ if (err != CUFFT_SUCCESS) {
+ ASTRA_ERROR("%s: CUFFT error %d.", msg, err);
+ return false;
+ } else {
+ return true;
+ }
+}
+
__global__ static void applyFilter_kernel(int _iProjectionCount,
int _iFreqBinCount,
cufftComplex * _pSinogram,
@@ -115,7 +100,8 @@ static void rescaleInverseFourier(int _iProjectionCount, int _iDetectorCount,
rescaleInverseFourier_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,
_iDetectorCount,
_pfInFourierOutput);
- CHECK_ERROR("rescaleInverseFourier_kernel failed");
+
+ checkCuda(cudaThreadSynchronize(), "rescaleInverseFourier");
}
void applyFilter(int _iProjectionCount, int _iFreqBinCount,
@@ -128,7 +114,8 @@ void applyFilter(int _iProjectionCount, int _iFreqBinCount,
applyFilter_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,
_iFreqBinCount,
_pSinogram, _pFilter);
- CHECK_ERROR("applyFilter_kernel failed");
+
+ checkCuda(cudaThreadSynchronize(), "applyFilter");
}
static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,
@@ -136,24 +123,22 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,
cufftComplex * _pDevTargetComplex)
{
cufftHandle plan;
- cufftResult result;
- result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to plan 1d r2c fft");
+ if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount), "invokeCudaFFT plan")) {
return false;
}
- result = cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex);
- cufftDestroy(plan);
+ if (!checkCufft(cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex), "invokeCudaFFT exec")) {
+ cufftDestroy(plan);
+ return false;
+ }
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to exec 1d r2c fft");
+ if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaFFT sync")) {
+ cufftDestroy(plan);
return false;
}
+ cufftDestroy(plan);
return true;
}
@@ -162,26 +147,25 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount,
float * _pfDevTarget)
{
cufftHandle plan;
- cufftResult result;
- result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to plan 1d c2r fft");
+ if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount), "invokeCudaIFFT plan")) {
return false;
}
- // todo: why do we have to get rid of the const qualifier?
- result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex,
- (cufftReal *)_pfDevTarget);
- cufftDestroy(plan);
-
- if(result != CUFFT_SUCCESS)
+ // Getting rid of the const qualifier is due to cufft API issue?
+ if (!checkCufft(cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex,
+ (cufftReal *)_pfDevTarget), "invokeCudaIFFT exec"))
{
- ASTRA_ERROR("Failed to exec 1d c2r fft");
+ cufftDestroy(plan);
return false;
}
+ if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaIFFT sync")) {
+ cufftDestroy(plan);
+ return false;
+ }
+
+ cufftDestroy(plan);
return true;
}
@@ -189,14 +173,12 @@ bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount,
cufftComplex ** _ppDevComplex)
{
size_t bufferSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount;
- SAFE_CALL(cudaMalloc((void **)_ppDevComplex, bufferSize));
- return true;
+ return checkCuda(cudaMalloc((void **)_ppDevComplex, bufferSize), "fft allocateComplexOnDevice");
}
bool freeComplexOnDevice(cufftComplex * _pDevComplex)
{
- SAFE_CALL(cudaFree(_pDevComplex));
- return true;
+ return checkCuda(cudaFree(_pDevComplex), "fft freeComplexOnDevice");
}
bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount,
@@ -204,9 +186,7 @@ bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount,
cufftComplex * _pDevComplexTarget)
{
size_t memSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount;
- SAFE_CALL(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice));
-
- return true;
+ return checkCuda(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice), "fft uploadComplexArrayToDevice");
}
bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,
@@ -217,25 +197,30 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,
float * pfDevRealFFTSource = NULL;
size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount;
- SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize));
- SAFE_CALL(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize));
+ if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize), "runCudaFFT malloc"))
+ return false;
+ if (!checkCuda(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize), "runCudaFFT memset")) {
+ cudaFree(pfDevRealFFTSource);
+ return false;
+ }
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;
float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount;
- SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+ if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) {
+ cudaFree(pfDevRealFFTSource);
+ return false;
+ }
}
bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount,
pfDevRealFFTSource, _pDevTargetComplex);
if(!bResult)
- {
return false;
- }
- SAFE_CALL(cudaFree(pfDevRealFFTSource));
+ cudaFree(pfDevRealFFTSource);
return true;
}
@@ -248,7 +233,8 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
float * pfDevRealFFTTarget = NULL;
size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount;
- SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize));
+ if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize), "runCudaIFFT malloc"))
+ return false;
bool bResult = invokeCudaIFFT(_iProjectionCount, _iFFTRealDetectorCount,
_pDevSourceComplex, pfDevRealFFTTarget);
@@ -260,17 +246,23 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount,
pfDevRealFFTTarget);
- SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch));
+ if (!checkCuda(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch), "runCudaIFFT memset")) {
+ cudaFree(pfDevRealFFTTarget);
+ return false;
+ }
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;
float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch;
- SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+ if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) {
+ cudaFree(pfDevRealFFTTarget);
+ return false;
+ }
}
- SAFE_CALL(cudaFree(pfDevRealFFTTarget));
+ cudaFree(pfDevRealFFTTarget);
return true;
}
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index f080abb..d7c3ab0 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -231,13 +231,12 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
else
devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
- cudaThreadSynchronize();
- cudaTextForceKernelsCompletion();
+ bool ok = checkCuda(cudaStreamSynchronize(stream), "par_bp");
cudaStreamDestroy(stream);
- return true;
+ return ok;
}
bool BP(float* D_volumeData, unsigned int volumePitch,
@@ -284,11 +283,8 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale);
- cudaThreadSynchronize();
-
- cudaTextForceKernelsCompletion();
- return true;
+ return checkCuda(cudaThreadSynchronize(), "BP_SART");
}
diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu
index aac6cc3..e947428 100644
--- a/cuda/2d/par_fp.cu
+++ b/cuda/2d/par_fp.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -305,8 +305,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock,
(dims.iProjDets+g_detBlockSize-1)/g_detBlockSize); // angle blocks, detector blocks
- // TODO: check if we can't immediately
- // destroy the stream after use
+ // TODO: consider limiting number of handle (chaotic) geoms
+ // with many alternating directions
cudaStream_t stream;
cudaStreamCreate(&stream);
streams.push_back(stream);
@@ -323,19 +323,16 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
}
}
- for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter)
- cudaStreamDestroy(*iter);
-
- streams.clear();
-
- cudaThreadSynchronize();
+ bool ok = true;
- cudaTextForceKernelsCompletion();
+ for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) {
+ ok &= checkCuda(cudaStreamSynchronize(*iter), "par_fp");
+ cudaStreamDestroy(*iter);
+ }
cudaFreeArray(D_dataArray);
-
- return true;
+ return ok;
}
bool FP_simple(float* D_volumeData, unsigned int volumePitch,
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 12ad6df..89d58c2 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -54,7 +54,7 @@ void MUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int
devMUL_SART<<<gridSize, blockSize>>>(pfOut, pfIn, pitch, width);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), "MUL_SART");
}
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 2c5fdc9..b251734 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu
index 533b86c..ac360f0 100644
--- a/cuda/2d/util.cu
+++ b/cuda/2d/util.cu
@@ -1,7 +1,7 @@
/*
-----------------------------------------------------------------------
-Copyright: 2010-2018, imec Vision Lab, University of Antwerp
- 2014-2018, CWI, Amsterdam
+Copyright: 2010-2021, imec Vision Lab, University of Antwerp
+ 2014-2021, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
@@ -40,12 +40,8 @@ bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch,
{
size_t width = dims.iVolWidth;
size_t height = dims.iVolHeight;
- // TODO: memory order
- cudaError_t err;
- err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice);
- ASTRA_CUDA_ASSERT(err);
- assert(err == cudaSuccess);
- return true;
+
+ return checkCuda(cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice), "copyVolumeToDevice");
}
bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch,
@@ -54,10 +50,8 @@ bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch,
{
size_t width = dims.iVolWidth;
size_t height = dims.iVolHeight;
- // TODO: memory order
- cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost);
- ASTRA_CUDA_ASSERT(err);
- return true;
+
+ return checkCuda(cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost), "copyVolumeFromDevice");
}
@@ -67,10 +61,8 @@ bool copySinogramFromDevice(float* out_data, unsigned int out_pitch,
{
size_t width = dims.iProjDets;
size_t height = dims.iProjAngles;
- // TODO: memory order
- cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost);
- ASTRA_CUDA_ASSERT(err);
- return true;
+
+ return checkCuda(cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost), "copySinogramFromDevice");
}
bool copySinogramToDevice(const float* in_data, unsigned int in_pitch,
@@ -79,20 +71,15 @@ bool copySinogramToDevice(const float* in_data, unsigned int in_pitch,
{
size_t width = dims.iProjDets;
size_t height = dims.iProjAngles;
- // TODO: memory order
- cudaError_t err;
- err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice);
- ASTRA_CUDA_ASSERT(err);
- return true;
+
+ return checkCuda(cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice), "copySinogramToDevice");
}
bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsigned int& pitch)
{
size_t p;
- cudaError_t ret = cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height);
- if (ret != cudaSuccess) {
- reportCudaError(ret);
+ if (!checkCuda(cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height), "allocateVolume")) {
ASTRA_ERROR("Failed to allocate %dx%d GPU buffer", width, height);
return false;
}
@@ -104,11 +91,9 @@ bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsign
return true;
}
-void zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned int height)
+bool zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned int height)
{
- cudaError_t err;
- err = cudaMemset2D(data, sizeof(float)*pitch, 0, sizeof(float)*width, height);
- ASTRA_CUDA_ASSERT(err);
+ return checkCuda(cudaMemset2D(data, sizeof(float)*pitch, 0, sizeof(float)*width, height), "zeroVolume");
}
bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims)
@@ -121,14 +106,14 @@ bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimension
return allocateVolume(D_ptr, dims.iProjDets, dims.iProjAngles, pitch);
}
-void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
+bool zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
{
- zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight);
+ return zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight);
}
-void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
+bool zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
{
- zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles);
+ return zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles);
}
void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims)
@@ -231,7 +216,7 @@ float dotProduct2D(float* D_data, unsigned int pitch,
// Step 1: reduce 2D from image to a single vector, taking sum of squares
reduce2D<<< dimGrid2, dimBlock2, shared_mem2>>>(D_data, D_buf, pitch, width, height);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), "dotProduct2D reduce2D");
// Step 2: reduce 1D: add up elements in vector
if (bx * by > 512)
@@ -248,31 +233,21 @@ float dotProduct2D(float* D_data, unsigned int pitch,
float x;
cudaMemcpy(&x, D_res, 4, cudaMemcpyDeviceToHost);
- cudaTextForceKernelsCompletion();
+ checkCuda(cudaThreadSynchronize(), "dotProduct2D");
cudaFree(D_buf);
return x;
}
-
-bool cudaTextForceKernelsCompletion()
+bool checkCuda(cudaError_t err, const char *msg)
{
- cudaError_t returnedCudaError = cudaThreadSynchronize();
-
- if(returnedCudaError != cudaSuccess) {
- ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError));
+ if (err != cudaSuccess) {
+ ASTRA_ERROR("%s: CUDA error %d: %s.", msg, err, cudaGetErrorString(err));
return false;
+ } else {
+ return true;
}
-
- return true;
}
-void reportCudaError(cudaError_t err)
-{
- if(err != cudaSuccess)
- ASTRA_ERROR("CUDA error %d: %s.", err, cudaGetErrorString(err));
-}
-
-
}