From 1dd79f23f783564719a52de7d9b54b17005c32d7 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 2 May 2014 09:20:54 +0000 Subject: Add SIRT-Weighted BP3D (par3d-only) for use in large BP --- cuda/3d/astra3d.cu | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 3 deletions(-) (limited to 'cuda/3d/astra3d.cu') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 4447775..2efac98 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1463,6 +1463,40 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, return ok; } +// This computes the column weights, divides by them, and adds the +// result to the current volume. This is both more expensive and more +// GPU memory intensive than the regular BP, but allows saving system RAM. +bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + float fDetUSize, + float fDetVSize, + const float *pfAngles, + int iGPUIndex, int iVoxelSuperSampling) +{ + if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + return false; + if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + return false; + + SPar3DProjection* p = genPar3DProjections(iProjAngles, + iProjU, iProjV, + fDetUSize, fDetVSize, + pfAngles); + + bool ok; + ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ, + iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); + + delete[] p; + + return ok; +} + bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, unsigned int iVolX, @@ -1491,9 +1525,6 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, dims.iRaysPerVoxelDim = iVoxelSuperSampling; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); cudaError_t err = cudaGetLastError(); @@ -1540,6 +1571,111 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, } +// This computes the column weights, divides by them, and adds the +// result to the current volume. This is both more expensive and more +// GPU memory intensive than the regular BP, but allows saving system RAM. +bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, + const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + const SPar3DProjection *pfAngles, + int iGPUIndex, int iVoxelSuperSampling) +{ + SDimensions3D dims; + + dims.iVolX = iVolX; + dims.iVolY = iVolY; + dims.iVolZ = iVolZ; + if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + return false; + + dims.iProjAngles = iProjAngles; + dims.iProjU = iProjU; + dims.iProjV = iProjV; + + if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + return false; + + dims.iRaysPerVoxelDim = iVoxelSuperSampling; + + if (iGPUIndex != -1) { + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + return false; + } + + + cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims); + bool ok = D_pixelWeight.ptr; + if (!ok) + return false; + + cudaPitchedPtr D_volumeData = allocateVolumeData(dims); + ok = D_volumeData.ptr; + if (!ok) { + cudaFree(D_pixelWeight.ptr); + return false; + } + + cudaPitchedPtr D_projData = allocateProjectionData(dims); + ok = D_projData.ptr; + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + return false; + } + + // Compute weights + ok &= zeroVolumeData(D_pixelWeight, dims); + processSino3D(D_projData, 1.0f, dims); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles); + processVol3D(D_pixelWeight, dims); + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + return false; + } + + ok &= copyProjectionsToDevice(pfProjections, D_projData, + dims, dims.iProjU); + ok &= zeroVolumeData(D_volumeData, dims); + // Do BP into D_volumeData + ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + // Multiply with weights + processVol3D(D_volumeData, D_pixelWeight, dims); + + // Upload previous iterate to D_pixelWeight... + ok &= copyVolumeToDevice(pfVolume, D_pixelWeight, dims, dims.iVolX); + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + return false; + } + // ...and add it to the weighted BP + processVol3D(D_volumeData, D_pixelWeight, dims); + + // Then copy the result back + ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); + + + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + + return ok; + +} + + bool astraCudaFDK(float* pfVolume, const float* pfProjections, unsigned int iVolX, -- cgit v1.2.3