diff options
| author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2015-03-12 12:30:47 +0100 | 
|---|---|---|
| committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2015-03-12 12:30:47 +0100 | 
| commit | a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd (patch) | |
| tree | 09e13e27b69c254b5bd36d510ca700077bfb8c77 | |
| parent | 57ee6b85884b8226b26b7415ef151b4a6e63337c (diff) | |
| download | astra-a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd.tar.gz astra-a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd.tar.bz2 astra-a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd.tar.xz astra-a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd.zip  | |
Add outputScale argument to 3D CUDA BP
| -rw-r--r-- | cuda/3d/algo3d.cu | 7 | ||||
| -rw-r--r-- | cuda/3d/algo3d.h | 3 | ||||
| -rw-r--r-- | cuda/3d/astra3d.cu | 12 | ||||
| -rw-r--r-- | cuda/3d/cgls3d.cu | 4 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.cu | 24 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.h | 7 | ||||
| -rw-r--r-- | cuda/3d/par3d_bp.cu | 23 | ||||
| -rw-r--r-- | cuda/3d/par3d_bp.h | 6 | ||||
| -rw-r--r-- | cuda/3d/sirt3d.cu | 6 | 
9 files changed, 54 insertions, 38 deletions
diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index 7f61280..b775438 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -94,12 +94,13 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,  }  bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, -                       cudaPitchedPtr& D_projData) +                       cudaPitchedPtr& D_projData, +                       float outputScale)  {  	if (coneProjs) { -		return ConeBP(D_volumeData, D_projData, dims, coneProjs); +		return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale);  	} else { -		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs); +		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale);  	}  } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index f4c6a87..35ffc49 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -51,7 +51,8 @@ protected:  	            cudaPitchedPtr& D_projData,   	            float outputScale);  	bool callBP(cudaPitchedPtr& D_volumeData,  -	            cudaPitchedPtr& D_projData); +	            cudaPitchedPtr& D_projData, +	            float outputScale);  	SDimensions3D dims;  	SConeProjection* coneProjs; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index b2375f3..7589416 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1252,9 +1252,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,  	}  	if (pParProjs) -		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); +		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);  	else -		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); +		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);  	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1330,9 +1330,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,  	processSino3D<opSet>(D_projData, 1.0f, dims);  	if (pParProjs) -		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs); +		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f);  	else -		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs); +		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f);  	processVol3D<opInvert>(D_pixelWeight, dims);  	if (!ok) { @@ -1347,9 +1347,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,  	ok &= zeroVolumeData(D_volumeData, dims);  	// Do BP into D_volumeData  	if (pParProjs) -		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); +		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);  	else -		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); +		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);  	// Multiply with weights  	processVol3D<opMul>(D_volumeData, D_pixelWeight, dims); diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index 5071a9b..4f632f3 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations)  		// p = A'*r  		zeroVolumeData(D_p, dims); -		callBP(D_p, D_r); +		callBP(D_p, D_r, 1.0f);  		if (useVolumeMask)  			processVol3D<opMul>(D_p, D_maskData, dims); @@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations)  		// z = A'*r  		zeroVolumeData(D_z, dims); -		callBP(D_z, D_r); +		callBP(D_z, D_r, 1.0f);  		if (useVolumeMask)  			processVol3D<opMul>(D_z, D_maskData, dims); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 5648d6f..5e67980 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array)  //__launch_bounds__(32*16, 4)  __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, -                            int angleOffset, const astraCUDA3d::SDimensions3D dims) +                            int angleOffset, const astraCUDA3d::SDimensions3D dims, +                            float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  		endZ = dims.iVolZ - startZ;  	for(int i=0; i < endZ; i++) -		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; +		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;  } //End kernel  // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;  	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; +	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + +  	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)  	{ @@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  		} -		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); +		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;  	}  }  bool ConeBP_Array(cudaPitchedPtr D_volumeData,                    cudaArray *D_projArray, -                  const SDimensions3D& dims, const SConeProjection* angles) +                  const SDimensions3D& dims, const SConeProjection* angles, +                  float fOutputScale)  {  	bindProjDataTexture(D_projArray); @@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  		// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);   			if (dims.iRaysPerVoxelDim == 1) -				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  			else -				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  		}  		cudaTextForceKernelsCompletion(); @@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  bool ConeBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData, -            const SDimensions3D& dims, const SConeProjection* angles) +            const SDimensions3D& dims, const SConeProjection* angles, +            float fOutputScale)  {  	// transfer projections to array  	cudaArray* cuArray = allocateProjectionArray(dims);  	transferProjectionsToArray(D_projData, cuArray, dims); -	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles); +	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);  	cudaFreeArray(cuArray); diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index cba6d9f..4d3d2dd 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -33,13 +33,14 @@ namespace astraCUDA3d {  _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,                    cudaArray *D_projArray, -                  const SDimensions3D& dims, const SConeProjection* angles); +                  const SDimensions3D& dims, const SConeProjection* angles, +                  float fOutputScale);  _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData, -            const SDimensions3D& dims, const SConeProjection* angles); +            const SDimensions3D& dims, const SConeProjection* angles, +            float fOutputScale); -  }  #endif diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 0c33280..1217949 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array)  } -__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn  		endZ = dims.iVolZ - startZ;  	for(int i=0; i < endZ; i++) -		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; +		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;  }  // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star  	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; +	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + +  	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)  	{ @@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star  		} -		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); +		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;  	}  }  bool Par3DBP_Array(cudaPitchedPtr D_volumeData,                     cudaArray *D_projArray, -                   const SDimensions3D& dims, const SPar3DProjection* angles) +                   const SDimensions3D& dims, const SPar3DProjection* angles, +                   float fOutputScale)  {  	bindProjDataTexture(D_projArray); @@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,  		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  			// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);   			if (dims.iRaysPerVoxelDim == 1) -				dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +				dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  			else -				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  		}  		cudaTextForceKernelsCompletion(); @@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,  bool Par3DBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData, -            const SDimensions3D& dims, const SPar3DProjection* angles) +            const SDimensions3D& dims, const SPar3DProjection* angles, +            float fOutputScale)  {  	// transfer projections to array  	cudaArray* cuArray = allocateProjectionArray(dims);  	transferProjectionsToArray(D_projData, cuArray, dims); -	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles); +	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);  	cudaFreeArray(cuArray); diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index ece37d1..f1fc62d 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -33,11 +33,13 @@ namespace astraCUDA3d {  _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,                     cudaArray *D_projArray, -                   const SDimensions3D& dims, const SPar3DProjection* angles); +                   const SDimensions3D& dims, const SPar3DProjection* angles, +                   float fOutputScale);  _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,               cudaPitchedPtr D_projData, -             const SDimensions3D& dims, const SPar3DProjection* angles); +             const SDimensions3D& dims, const SPar3DProjection* angles, +             float fOutputScale);  } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 389ee6b..0e6630a 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -160,10 +160,10 @@ bool SIRT::precomputeWeights()  	zeroVolumeData(D_pixelWeight, dims);  	if (useSinogramMask) { -		callBP(D_pixelWeight, D_smaskData); +		callBP(D_pixelWeight, D_smaskData, 1.0f);  	} else {  		processSino3D<opSet>(D_projData, 1.0f, dims); -		callBP(D_pixelWeight, D_projData); +		callBP(D_pixelWeight, D_projData, 1.0f);  	}  #if 0  	float* bufp = new float[512*512]; @@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations)  #endif -		callBP(D_tmpData, D_projData); +		callBP(D_tmpData, D_projData, 1.0f);  #if 0  	printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr);  	float* buf = new float[256*256];  | 
