diff options
| -rw-r--r-- | cuda/2d/astra.cu | 2 | ||||
| -rw-r--r-- | cuda/2d/fft.cu | 2 | ||||
| -rw-r--r-- | cuda/3d/astra3d.cu | 80 | ||||
| -rw-r--r-- | cuda/3d/astra3d.h | 7 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.cu | 12 | ||||
| -rw-r--r-- | cuda/3d/dims3d.h | 4 | ||||
| -rw-r--r-- | cuda/3d/fdk.cu | 250 | ||||
| -rw-r--r-- | cuda/3d/fdk.h | 8 | ||||
| -rw-r--r-- | cuda/3d/mem3d.cu | 28 | ||||
| -rw-r--r-- | cuda/3d/mem3d.h | 1 | ||||
| -rw-r--r-- | include/astra/CompositeGeometryManager.h | 9 | ||||
| -rw-r--r-- | src/CompositeGeometryManager.cpp | 50 | ||||
| -rw-r--r-- | src/CudaFDKAlgorithm3D.cpp | 9 | 
13 files changed, 208 insertions, 254 deletions
| diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 7317d69..b56ddf9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -343,7 +343,7 @@ bool AstraFBP::run()  		dims3d.iProjV = 1;  		astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, -		              pData->fOriginDetectorDistance, 0.0f, 0.0f, +		              pData->fOriginDetectorDistance, 0.0f,  		              pData->dims.fDetScale, 1.0f, // TODO: Are these correct?  		              pData->bShortScan, dims3d, pData->angles);  	} diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2d259a9..3dd1c22 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,7 +35,7 @@ $Id$  #include <fstream>  #include "../../include/astra/Logging.h" -#include "astra/Fourier.h" +#include "../../include/astra/Fourier.h"  using namespace astra; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index dd6d551..91f4530 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1291,84 +1291,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, -bool astraCudaFDK(float* pfVolume, const float* pfProjections, -                  const CVolumeGeometry3D* pVolGeom, -                  const CConeProjectionGeometry3D* pProjGeom, -                  bool bShortScan, -                  int iGPUIndex, int iVoxelSuperSampling) -{ -	SDimensions3D dims; -	SProjectorParams3D params; - -	params.iRaysPerVoxelDim = iVoxelSuperSampling; - -	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - -	// TODO: Check that pVolGeom is normalized, since we don't support -	// other volume geometries yet - -	if (!ok) -		return false; - - -	if (iVoxelSuperSampling == 0) -		return false; - -	if (iGPUIndex != -1) { -		cudaSetDevice(iGPUIndex); -		cudaError_t err = cudaGetLastError(); - -		// Ignore errors caused by calling cudaSetDevice multiple times -		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) -			return false; -	} - -	cudaPitchedPtr D_volumeData = allocateVolumeData(dims); -	ok = D_volumeData.ptr; -	if (!ok) -		return false; - -	cudaPitchedPtr D_projData = allocateProjectionData(dims); -	ok = D_projData.ptr; -	if (!ok) { -		cudaFree(D_volumeData.ptr); -		return false; -	} - -	ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU); - -	ok &= zeroVolumeData(D_volumeData, dims); - -	if (!ok) { -		cudaFree(D_volumeData.ptr); -		cudaFree(D_projData.ptr); -		return false; -	} - -	float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); -	float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); -	float fDetUSize = pProjGeom->getDetectorSpacingX(); -	float fDetVSize = pProjGeom->getDetectorSpacingY(); -	const float *pfAngles = pProjGeom->getProjectionAngles(); - - -	// TODO: Offer interface for SrcZ, DetZ -	// TODO: Voxel scaling -	ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, -	          fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, -	          dims, pfAngles, bShortScan); - -	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - -	cudaFree(D_volumeData.ptr); -	cudaFree(D_projData.ptr); - -	return ok; - -} - - - -  } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 3345ab8..93bf576 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -309,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje                        const CProjectionGeometry3D* pProjGeom,                        int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, -                  const CVolumeGeometry3D* pVolGeom, -                  const CConeProjectionGeometry3D* pProjGeom, -                  bool bShortScan, -                  int iGPUIndex, int iVoxelSuperSampling); - -  } diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index f077f0d..6bd9d16 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -77,6 +77,7 @@ bool bindProjDataTexture(const cudaArray* array)  //__launch_bounds__(32*16, 4) +template<bool FDKWEIGHT>  __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,                              int angleOffset, const astraCUDA3d::SDimensions3D dims,                              float fOutputScale) @@ -134,7 +135,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  				fU = fUNum * fr;  				fV = fVNum * fr;  				float fVal = tex3D(gT_coneProjTexture, fU, fAngle, fV); -				Z[idx] += fVal; // fr*fr*fVal; +				if (FDKWEIGHT) +					Z[idx] += fr*fr*fVal; +				else +					Z[idx] += fVal;  				fUNum += fCu.z;  				fVNum += fCv.z; @@ -297,8 +301,10 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  		// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);  -			if (params.iRaysPerVoxelDim == 1) -				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +			if (params.bFDKWeighting) +				dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +			else if (params.iRaysPerVoxelDim == 1) +				dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  			else  				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);  		} diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h index a15c67a..eb7045f 100644 --- a/cuda/3d/dims3d.h +++ b/cuda/3d/dims3d.h @@ -58,7 +58,8 @@ struct SProjectorParams3D {  	    iRaysPerDetDim(1), iRaysPerVoxelDim(1),  	    fOutputScale(1.0f),  	    fVolScaleX(1.0f), fVolScaleY(1.0f), fVolScaleZ(1.0f), -	    ker(ker3d_default) +	    ker(ker3d_default), +	    bFDKWeighting(false)  	{ }  	unsigned int iRaysPerDetDim; @@ -68,6 +69,7 @@ struct SProjectorParams3D {  	float fVolScaleY;  	float fVolScaleZ;  	Cuda3DProjectionKernel ker; +	bool bFDKWeighting;  };  } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..d847eee 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -41,8 +41,11 @@ $Id$  #include "dims3d.h"  #include "arith3d.h" +#include "cone_bp.h"  #include "../2d/fft.h" +#include "../../include/astra/Logging.h" +  typedef texture<float, 3, cudaReadModeElementType> texture3D;  static texture3D gT_coneProjTexture; @@ -86,129 +89,7 @@ static bool bindProjDataTexture(const cudaArray* array)  } -__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims) -{ -	float* volData = (float*)D_volData; - -	int endAngle = startAngle + g_anglesPerBlock; -	if (endAngle > dims.iProjAngles) -		endAngle = dims.iProjAngles; - -	// threadIdx: x = rel x -	//            y = rel y - -	// blockIdx:  x = x + y -    //            y = z - - -	// TO TRY: precompute part of detector intersection formulas in shared mem? -	// TO TRY: inner loop over z, gather ray values in shared mem - -	const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x; -	const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y; - -	if (X > dims.iVolX) -		return; -	if (Y > dims.iVolY) -		return; - -	const int startZ = blockIdx.y * g_volBlockZ; -	int endZ = startZ + g_volBlockZ; -	if (endZ > dims.iVolZ) -		endZ = dims.iVolZ; - -	float fX = X - 0.5f*dims.iVolX + 0.5f; -	float fY = Y - 0.5f*dims.iVolY + 0.5f; -	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ; - -	const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f; -	const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ); - -	// Note re. fZ/rV_base: the computations below are all relative to the -	// optical axis, so we do the Z-adjustments beforehand. - -	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) -	{ - -		float fVal = 0.0f; -		float fAngle = startAngle + 0.5f; - -		for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f) -		{ - -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; - -			const float fR = fSrcOrigin; -			const float fD = fR - fX * sin_theta + fY * cos_theta; -			float fWeight = fR / fD; -			fWeight *= fWeight; - -			const float fScaleFactor = (fR + fDetOrigin) / fD; -			const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize; -			const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize; - -			fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV); - -		} - -		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal; -//		projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f; -//		if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); } -	} - -} - - -bool FDK_BP(cudaPitchedPtr D_volumeData, -            cudaPitchedPtr D_projData, -            float fSrcOrigin, float fDetOrigin, -            float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -            const SDimensions3D& dims, const float* angles) -{ -	// transfer projections to array - -	cudaArray* cuArray = allocateProjectionArray(dims); -	transferProjectionsToArray(D_projData, cuArray, dims); - -	bindProjDataTexture(cuArray); - -	float* angle_sin = new float[dims.iProjAngles]; -	float* angle_cos = new float[dims.iProjAngles]; - -	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { -		angle_sin[i] = sinf(angles[i]); -		angle_cos[i] = cosf(angles[i]); -	} -	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	assert(e1 == cudaSuccess); -	assert(e2 == cudaSuccess); - -	delete[] angle_sin; -	delete[] angle_cos; - -	dim3 dimBlock(g_volBlockX, g_volBlockY); - -	dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); - -	// timeval t; -	// tic(t); - -	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { -		devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims); -	} - -	cudaTextForceKernelsCompletion(); - -	cudaFreeArray(cuArray); - -	// printf("%f\n", toc(t)); - -	return true; -} - -__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims) +__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims)  {  	float* projData = (float*)D_projData;  	int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -230,21 +111,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig  	const float fT = fCentralRayLength * fCentralRayLength + fU * fU; -	float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ; +	float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift; + +	//const float fW = fCentralRayLength; +	//const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles; +	const float fW1 = fSrcOrigin * fDetUSize * fDetVSize; +	const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles;  	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)  	{  		const float fRayLength = sqrtf(fT + fV * fV); -		const float fWeight = fCentralRayLength / fRayLength; +		const float fWeight = fW / fRayLength;  		projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight; -		fV += 1.0f; +		fV += fDetVSize;  	}  } -__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) +__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)  {  	float* projData = (float*)D_projData;  	int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -305,7 +191,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un  // Perform the FDK pre-weighting and filtering  bool FDK_PreWeight(cudaPitchedPtr D_projData,                  float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, +                float fZShift,                  float fDetUSize, float fDetVSize, bool bShortScan,                  const SDimensions3D& dims, const float* angles)  { @@ -318,23 +204,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,  	int projPitch = D_projData.pitch/sizeof(float); -	devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims); +	devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims);  	cudaTextForceKernelsCompletion(); -	if (bShortScan) { +	if (bShortScan && dims.iProjAngles > 1) { +		ASTRA_DEBUG("Doing Parker weighting");  		// We do short-scan Parker weighting -		cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles, +		// First, determine (in a very basic way) the interval that's +		// been scanned. We assume angles[0] is one of the endpoints of the +		// range. +		float fdA = angles[1] - angles[0]; + +		while (fdA < -M_PI) +			fdA += 2*M_PI; +		while (fdA >= M_PI) +			fdA -= 2*M_PI; + +		float fAngleBase; +		if (fdA >= 0.0f) { +			// going up from angles[0] +			fAngleBase = angles[dims.iProjAngles - 1]; +		} else { +			// going down from angles[0] +			fAngleBase = angles[0]; +		} + +		// We pick the highest end of the range, and then +		// move all angles so they fall in (-2pi,0] + +		float *fRelAngles = new float[dims.iProjAngles]; +		for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +			float f = angles[i] - fAngleBase; +			while (f > 0) +				f -= 2*M_PI; +			while (f <= -2*M_PI) +				f += 2*M_PI; +			fRelAngles[i] = f; + +		} + +		cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles,  		                                    dims.iProjAngles*sizeof(float), 0,  		                                    cudaMemcpyHostToDevice);  		assert(!e1); +		delete[] fRelAngles; -		// TODO: detector pixel size! -		float fCentralFanAngle = atanf((dims.iProjU*0.5f) / +		float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) /  		                               (fSrcOrigin + fDetOrigin)); -		devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims); +		devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims);  	} @@ -344,10 +264,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,  bool FDK_Filter(cudaPitchedPtr D_projData,                  cufftComplex * D_filter, -                float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, -                float fDetUSize, float fDetVSize, bool bShortScan, -                const SDimensions3D& dims, const float* angles) +                const SDimensions3D& dims)  {  	// The filtering is a regular ramp filter per detector line. @@ -392,9 +309,8 @@ bool FDK_Filter(cudaPitchedPtr D_projData,  bool FDK(cudaPitchedPtr D_volumeData,           cudaPitchedPtr D_projData, -         float fSrcOrigin, float fDetOrigin, -         float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -         const SDimensions3D& dims, const float* angles, bool bShortScan) +         const SConeProjection* angles, +         const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan)  {  	bool ok;  	// Generate filter @@ -403,12 +319,45 @@ bool FDK(cudaPitchedPtr D_volumeData,  	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);  	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + +	// NB: We don't support arbitrary cone_vec geometries here. +	// Only those that are vertical sub-geometries +	// (cf. CompositeGeometryManager) of regular cone geometries. +	assert(dims.iProjAngles > 0); +	const SConeProjection& p0 = angles[0]; + +	// assuming U is in the XY plane, V is parallel to Z axis +	float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX; +	float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY; +	float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ; + +	float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY); +	float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY); +	float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY); +	float fDetVSize = abs(p0.fDetVZ); + +	float fZShift = fDetCZ - p0.fSrcZ; + +	float *pfAngles = new float[dims.iProjAngles]; +	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +		// FIXME: Sign/order +		pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI; +	} + + +#if 1  	ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, -	                fSrcZ, fDetZ, fDetUSize, fDetVSize, -	                bShortScan, dims, angles); +	                fZShift, fDetUSize, fDetVSize, +	                bShortScan, dims, pfAngles); +#else +	ok = true; +#endif +	delete[] pfAngles; +  	if (!ok)  		return false; +#if 1  	cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];  	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); @@ -425,28 +374,25 @@ bool FDK(cudaPitchedPtr D_volumeData, -	ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin, -	                fSrcZ, fDetZ, fDetUSize, fDetVSize, -	                bShortScan, dims, angles); +	ok = FDK_Filter(D_projData, D_filter, dims);  	// Clean up filter  	freeComplexOnDevice(D_filter); - +#endif  	if (!ok)  		return false;  	// Perform BP -	ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, -	            fDetUSize, fDetVSize, dims, angles); +	params.bFDKWeighting = true; + +	//ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles); +	ok = ConeBP(D_volumeData, D_projData, dims, angles, params);  	if (!ok)  		return false; -	processVol3D<opMul>(D_volumeData, -	                  (M_PI / 2.0f) / (float)dims.iProjAngles, dims); -  	return true;  } diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h index de7fe53..8d2a128 100644 --- a/cuda/3d/fdk.h +++ b/cuda/3d/fdk.h @@ -35,16 +35,14 @@ namespace astraCUDA3d {  bool FDK_PreWeight(cudaPitchedPtr D_projData,                  float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, +                float fZShift,                  float fDetUSize, float fDetVSize, bool bShortScan,                  const SDimensions3D& dims, const float* angles);  bool FDK(cudaPitchedPtr D_volumeData,           cudaPitchedPtr D_projData, -         float fSrcOrigin, float fDetOrigin, -         float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -         const SDimensions3D& dims, const float* angles, bool bShortScan); - +         const SConeProjection* angles, +         const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan);  } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 9a239da..4fb30a2 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -38,6 +38,7 @@ $Id$  #include "cone_bp.h"  #include "par3d_fp.h"  #include "par3d_bp.h" +#include "fdk.h"  #include "astra/Logging.h" @@ -278,6 +279,33 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  } +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan) +{ +	SDimensions3D dims; +	SProjectorParams3D params; + +	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); +	if (!ok) +		return false; + +	SPar3DProjection* pParProjs; +	SConeProjection* pConeProjs; + +	ok = convertAstraGeometry(pVolGeom, pProjGeom, +	                          pParProjs, pConeProjs, +	                          params); + +	if (!ok || !pConeProjs) +		return false; + +	ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan); + +	return ok; + + + +} + diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 6fff80b..2d0fb27 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan);  } diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 18dd72f..064370a 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -55,6 +55,10 @@ struct SGPUParams {  	size_t memory;  }; +struct SFDKSettings { +	bool bShortScan; +}; +  class _AstraExport CCompositeGeometryManager {  public: @@ -127,9 +131,10 @@ public:  		CProjector3D *pProjector; // For a `global' geometry. It will not match  		                          // the geometries of the input and output. +		SFDKSettings FDKSettings;  		enum { -			JOB_FP, JOB_BP, JOB_NOP +			JOB_FP, JOB_BP, JOB_FDK, JOB_NOP  		} eType;  		enum {  			MODE_ADD, MODE_SET @@ -155,6 +160,8 @@ public:  	          CFloat32ProjectionData3DMemory *pProjData);  	bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,  	          CFloat32ProjectionData3DMemory *pProjData); +	bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, +	          CFloat32ProjectionData3DMemory *pProjData, bool bShortScan);  	bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);  	bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index c63faaa..7c4f8e6 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div  				newjob.eType = j->eType;  				newjob.eMode = j->eMode;  				newjob.pProjector = j->pProjector; +				newjob.FDKSettings = j->FDKSettings;  				CPart* input = job.pInput->reduce(outputPart.get()); @@ -1039,6 +1040,25 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat  	return doJobs(L);  } + +bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, +                                     CFloat32ProjectionData3DMemory *pProjData, bool bShortScan) +{ +	if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) { +		ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required"); +		return false; +	} + +	SJob job = createJobBP(pProjector, pVolData, pProjData); +	job.eType = SJob::JOB_FDK; +	job.FDKSettings.bShortScan = bShortScan; + +	TJobList L; +	L.push_back(job); + +	return doJobs(L); +} +  bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)  {  	ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume"); @@ -1232,7 +1252,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  		ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);  		if (!ok) ASTRA_ERROR("Error copying input data to GPU"); -		if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { +		switch (j.eType) { +		case CCompositeGeometryManager::SJob::JOB_FP: +		{  			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));  			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); @@ -1241,7 +1263,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);  			if (!ok) ASTRA_ERROR("Error performing sub-FP");  			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); -		} else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { +		} +		break; +		case CCompositeGeometryManager::SJob::JOB_BP: +		{  			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));  			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); @@ -1250,7 +1275,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);  			if (!ok) ASTRA_ERROR("Error performing sub-BP");  			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); -		} else { +		} +		break; +		case CCompositeGeometryManager::SJob::JOB_FDK: +		{ +			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); +			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + +			if (srcdims.subx || srcdims.suby) { +				ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK"); +				ok = false; +			} else { +				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK"); + +				ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan); +				if (!ok) ASTRA_ERROR("Error performing sub-FDK"); +				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done"); +			} +		} +		break; +		default:  			assert(false);  		} diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp index e101a42..c7c8ed5 100644 --- a/src/CudaFDKAlgorithm3D.cpp +++ b/src/CudaFDKAlgorithm3D.cpp @@ -32,6 +32,7 @@ $Id$  #include "astra/CudaProjector3D.h"  #include "astra/ConeProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h"  #include "astra/Logging.h" @@ -206,6 +207,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)  	ASTRA_ASSERT(pReconMem); +#if 0  	bool ok = true;  	ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(), @@ -213,6 +215,13 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)  	                  m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling);  	ASTRA_ASSERT(ok); +#endif + +	CCompositeGeometryManager cgm; + +	cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan); + +  }  //---------------------------------------------------------------------------------------- | 
