From 1875e824a0358a7e7510b31f5e87708b304652bc Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 11:38:02 +0100 Subject: Remove reportCudaError function --- cuda/3d/util3d.cu | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index 844b880..8b66432 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -46,12 +46,9 @@ cudaPitchedPtr allocateVolumeData(const SDimensions3D& dims) cudaPitchedPtr volData; - cudaError err = cudaMalloc3D(&volData, extentV); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3D(&volData, extentV), "allocateVolumeData 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iVolX, dims.iVolY, dims.iVolZ); volData.ptr = 0; - // TODO: return 0 somehow? } return volData; @@ -65,12 +62,9 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims) cudaPitchedPtr projData; - cudaError err = cudaMalloc3D(&projData, extentP); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3D(&projData, extentP), "allocateProjectionData 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iProjU, dims.iProjAngles, dims.iProjV); projData.ptr = 0; - // TODO: return 0 somehow? } return projData; @@ -303,9 +297,8 @@ cudaArray* allocateVolumeArray(const SDimensions3D& dims) extentA.width = dims.iVolX; extentA.height = dims.iVolY; extentA.depth = dims.iVolZ; - cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + + if (!checkCuda(cudaMalloc3DArray(&cuArray, &channelDesc, extentA), "allocateVolumeArray 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iVolX, dims.iVolY, dims.iVolZ); return 0; } @@ -320,10 +313,8 @@ cudaArray* allocateProjectionArray(const SDimensions3D& dims) extentA.width = dims.iProjU; extentA.height = dims.iProjAngles; extentA.depth = dims.iProjV; - cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA); - if (err != cudaSuccess) { - astraCUDA::reportCudaError(err); + if (!checkCuda(cudaMalloc3DArray(&cuArray, &channelDesc, extentA), "allocateProjectionArray 3D")) { ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iProjU, dims.iProjAngles, dims.iProjV); return 0; } -- cgit v1.2.3 From b492e3d049e300132d2f22eee7922ff308342a84 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 11:46:17 +0100 Subject: Remove ASTRA_CUDA_ASSERT --- cuda/3d/util3d.cu | 72 +++++++++++++++---------------------------------------- 1 file changed, 19 insertions(+), 53 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index 8b66432..4f5d134 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -72,11 +72,11 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims) bool zeroVolumeData(cudaPitchedPtr& D_data, const SDimensions3D& dims) { char* t = (char*)D_data.ptr; - cudaError err; for (unsigned int z = 0; z < dims.iVolZ; ++z) { - err = cudaMemset2D(t, D_data.pitch, 0, dims.iVolX*sizeof(float), dims.iVolY); - ASTRA_CUDA_ASSERT(err); + if (!checkCuda(cudaMemset2D(t, D_data.pitch, 0, dims.iVolX*sizeof(float), dims.iVolY), "zeroVolumeData 3D")) { + return false; + } t += D_data.pitch * dims.iVolY; } return true; @@ -84,11 +84,11 @@ bool zeroVolumeData(cudaPitchedPtr& D_data, const SDimensions3D& dims) bool zeroProjectionData(cudaPitchedPtr& D_data, const SDimensions3D& dims) { char* t = (char*)D_data.ptr; - cudaError err; for (unsigned int z = 0; z < dims.iProjV; ++z) { - err = cudaMemset2D(t, D_data.pitch, 0, dims.iProjU*sizeof(float), dims.iProjAngles); - ASTRA_CUDA_ASSERT(err); + if (!checkCuda(cudaMemset2D(t, D_data.pitch, 0, dims.iProjU*sizeof(float), dims.iProjAngles), "zeroProjectionData 3D")) { + return false; + } t += D_data.pitch * dims.iProjAngles; } @@ -122,11 +122,7 @@ bool copyVolumeToDevice(const float* data, cudaPitchedPtr& D_data, const SDimens p.extent = extentV; p.kind = cudaMemcpyHostToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyVolumeToDevice 3D"); } bool copyProjectionsToDevice(const float* data, cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) @@ -157,11 +153,7 @@ bool copyProjectionsToDevice(const float* data, cudaPitchedPtr& D_data, const SD p.extent = extentV; p.kind = cudaMemcpyHostToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyProjectionsToDevice 3D"); } bool copyVolumeFromDevice(float* data, const cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) @@ -192,12 +184,9 @@ bool copyVolumeFromDevice(float* data, const cudaPitchedPtr& D_data, const SDime p.extent = extentV; p.kind = cudaMemcpyDeviceToHost; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyVolumeFromDevice 3D"); } + bool copyProjectionsFromDevice(float* data, const cudaPitchedPtr& D_data, const SDimensions3D& dims, unsigned int pitch) { if (!pitch) @@ -226,11 +215,7 @@ bool copyProjectionsFromDevice(float* data, const cudaPitchedPtr& D_data, const p.extent = extentV; p.kind = cudaMemcpyDeviceToHost; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "copyProjectionsFromDevice 3D"); } bool duplicateVolumeData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, const SDimensions3D& dims) @@ -252,12 +237,9 @@ bool duplicateVolumeData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, con p.extent = extentV; p.kind = cudaMemcpyDeviceToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "duplicateVolumeData 3D"); } + bool duplicateProjectionData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, const SDimensions3D& dims) { cudaExtent extentV; @@ -277,11 +259,7 @@ bool duplicateProjectionData(cudaPitchedPtr& D_dst, const cudaPitchedPtr& D_src, p.extent = extentV; p.kind = cudaMemcpyDeviceToDevice; - cudaError err; - err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - return err == cudaSuccess; + return checkCuda(cudaMemcpy3D(&p), "duplicateProjectionData 3D"); } @@ -343,12 +321,9 @@ bool transferVolumeToArray(cudaPitchedPtr D_volumeData, cudaArray* array, const p.extent = extentA; p.kind = cudaMemcpyDeviceToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferVolumeToArray 3D"); } + bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, const SDimensions3D& dims) { cudaExtent extentA; @@ -370,13 +345,9 @@ bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, con p.extent = extentA; p.kind = cudaMemcpyDeviceToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferProjectionsToArray 3D"); } + bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims) { cudaExtent extentA; @@ -404,12 +375,7 @@ bool transferHostProjectionsToArray(const float *projData, cudaArray* array, con p.extent = extentA; p.kind = cudaMemcpyHostToDevice; - cudaError err = cudaMemcpy3D(&p); - ASTRA_CUDA_ASSERT(err); - - // TODO: check errors - - return true; + return checkCuda(cudaMemcpy3D(&p), "transferHostProjectionsToArray 3D"); } -- cgit v1.2.3 From 39582115bc93b5435d25e56891815ae7cb1898fd Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 13:44:13 +0100 Subject: Remove cudaTextForceKernelsCompletion --- cuda/3d/arith3d.cu | 36 ++++++++++++++++++------------------ cuda/3d/cone_bp.cu | 4 +++- cuda/3d/cone_fp.cu | 17 +++++++++-------- cuda/3d/fdk.cu | 6 ++++-- cuda/3d/par3d_bp.cu | 4 +++- cuda/3d/par3d_fp.cu | 30 ++++++++++++++---------------- cuda/3d/util3d.cu | 12 ------------ 7 files changed, 51 insertions(+), 58 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/arith3d.cu b/cuda/3d/arith3d.cu index fbaa50c..b495f22 100644 --- a/cuda/3d/arith3d.cu +++ b/cuda/3d/arith3d.cu @@ -225,7 +225,7 @@ void processVol(CUdeviceptr* out, unsigned int pitch, unsigned int width, unsign devtoD<<>>(pfOut, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -238,7 +238,7 @@ void processVol(CUdeviceptr* out, float fParam, unsigned int pitch, unsigned int devFtoD<<>>(pfOut, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -252,7 +252,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in, unsigned int pitch, uns devDtoD<<>>(pfOut, pfIn, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -266,7 +266,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in, float fParam, unsigned devDFtoD<<>>(pfOut, pfIn, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -281,7 +281,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in1, const CUdeviceptr* in2 devDDFtoD<<>>(pfOut, pfIn1, pfIn2, fParam, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -296,7 +296,7 @@ void processVol(CUdeviceptr* out, const CUdeviceptr* in1, const CUdeviceptr* in2 devDDtoD<<>>(pfOut, pfIn1, pfIn2, pitch, width, height); - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } @@ -328,7 +328,7 @@ void processVol3D(cudaPitchedPtr& out, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -344,7 +344,7 @@ void processVol3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -362,7 +362,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensio pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -380,7 +380,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, c pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -400,7 +400,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitc pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -420,7 +420,7 @@ void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitc pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } @@ -448,7 +448,7 @@ void processSino3D(cudaPitchedPtr& out, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -464,7 +464,7 @@ void processSino3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims) pfOut += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -482,7 +482,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensi pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -500,7 +500,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, pfIn += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -520,7 +520,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } template @@ -540,7 +540,7 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit pfIn2 += step; } - cudaTextForceKernelsCompletion(); + checkCuda(cudaThreadSynchronize(), __FUNCTION__); } diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 7c3fc8d..e265304 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -357,7 +357,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } - cudaTextForceKernelsCompletion(); + // TODO: Consider not synchronizing here, if possible. + if (!checkCuda(cudaThreadSynchronize(), "cone_bp")) + return false; angles = angles + angleCount; // printf("%f\n", toc(t)); diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 4937d24..fede53b 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -402,8 +402,9 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, dim3 dimGrid( ((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV), (blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock); - // TODO: check if we can't immediately - // destroy the stream after use + + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -446,16 +447,16 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, } } - for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); + bool ok = true; - cudaTextForceKernelsCompletion(); + for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "cone_fp"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 7b36c93..0b8d2ab 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -176,7 +176,8 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, devFDK_preweight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims); - cudaTextForceKernelsCompletion(); + if (!checkCuda(cudaThreadSynchronize(), "FDK_PreWeight")) + return false; if (bShortScan && dims.iProjAngles > 1) { ASTRA_DEBUG("Doing Parker weighting"); @@ -225,9 +226,10 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, devFDK_ParkerWeight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims); + if (!checkCuda(cudaThreadSynchronize(), "FDK_PreWeight ParkerWeight")) + return false; } - cudaTextForceKernelsCompletion(); return true; } diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index d356b9f..1dc75ce 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -291,7 +291,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } - cudaTextForceKernelsCompletion(); + // TODO: Consider not synchronizing here, if possible. + if (!checkCuda(cudaThreadSynchronize(), "cone_bp")) + return false; angles = angles + angleCount; // printf("%f\n", toc(t)); diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 1f58516..cf8336c 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -501,8 +501,8 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, dim3 dimGrid( ((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV), (blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock); - // TODO: check if we can't immediately - // destroy the stream after use + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -545,17 +545,16 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, } } - for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "par3d_fp"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } bool Par3DFP(cudaPitchedPtr D_volumeData, @@ -726,17 +725,16 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } } - for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok = ok &= checkCuda(cudaStreamSynchronize(*iter), "Par3DFP_SumSqW"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index 4f5d134..71b5668 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -387,18 +387,6 @@ float dotProduct3D(cudaPitchedPtr data, unsigned int x, unsigned int y, } -bool cudaTextForceKernelsCompletion() -{ - cudaError_t returnedCudaError = cudaThreadSynchronize(); - - if(returnedCudaError != cudaSuccess) { - ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError)); - return false; - } - - return true; -} - int calcNextPowerOfTwo(int _iValue) { int iOutput = 1; -- cgit v1.2.3