diff options
| -rw-r--r-- | cuda/2d/astra.cu | 3 | ||||
| -rw-r--r-- | cuda/2d/fft.cu | 40 | ||||
| -rw-r--r-- | cuda/3d/algo3d.cu | 21 | ||||
| -rw-r--r-- | cuda/3d/algo3d.h | 5 | ||||
| -rw-r--r-- | cuda/3d/astra3d.cu | 213 | ||||
| -rw-r--r-- | cuda/3d/astra3d.h | 18 | ||||
| -rw-r--r-- | cuda/3d/cgls3d.cu | 2 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.cu | 40 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.h | 4 | ||||
| -rw-r--r-- | cuda/3d/cone_fp.cu | 95 | ||||
| -rw-r--r-- | cuda/3d/cone_fp.h | 4 | ||||
| -rw-r--r-- | cuda/3d/dims3d.h | 24 | ||||
| -rw-r--r-- | cuda/3d/fdk.cu | 250 | ||||
| -rw-r--r-- | cuda/3d/fdk.h | 8 | ||||
| -rw-r--r-- | cuda/3d/mem3d.cu | 55 | ||||
| -rw-r--r-- | cuda/3d/mem3d.h | 1 | ||||
| -rw-r--r-- | cuda/3d/par3d_bp.cu | 30 | ||||
| -rw-r--r-- | cuda/3d/par3d_bp.h | 4 | ||||
| -rw-r--r-- | cuda/3d/par3d_fp.cu | 156 | ||||
| -rw-r--r-- | cuda/3d/par3d_fp.h | 6 | ||||
| -rw-r--r-- | cuda/3d/sirt3d.cu | 2 | ||||
| -rw-r--r-- | include/astra/CompositeGeometryManager.h | 9 | ||||
| -rw-r--r-- | include/astra/GeometryUtil3D.h | 52 | ||||
| -rw-r--r-- | src/CompositeGeometryManager.cpp | 50 | ||||
| -rw-r--r-- | src/CudaFDKAlgorithm3D.cpp | 20 | 
25 files changed, 627 insertions, 485 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 4c69628..b56ddf9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -341,10 +341,9 @@ bool AstraFBP::run()  		dims3d.iProjAngles = pData->dims.iProjAngles;  		dims3d.iProjU = pData->dims.iProjDets;  		dims3d.iProjV = 1; -		dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1;  		astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, -		              pData->fOriginDetectorDistance, 0.0f, 0.0f, +		              pData->fOriginDetectorDistance, 0.0f,  		              pData->dims.fDetScale, 1.0f, // TODO: Are these correct?  		              pData->bShortScan, dims3d, pData->angles);  	} diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bfd493..aab755a 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,6 +35,7 @@ $Id$  #include <fstream>  #include "../../include/astra/Logging.h" +#include "../../include/astra/Fourier.h"  using namespace astra; @@ -303,6 +304,8 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  	float * pfFilt = new float[_iFFTFourierDetectorCount];  	float * pfW = new float[_iFFTFourierDetectorCount]; +#if 1 +  	for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)  	{  		float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; @@ -314,6 +317,43 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  		// w = 2*pi*(0:size(filt,2)-1)/order  		pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex;  	} +#else + +	float *pfData = new float[2*_iFFTRealDetectorCount]; +	int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)]; +	ip[0]=0; +	float32 *w = new float32[_iFFTRealDetectorCount/2]; + +	for (int i = 0; i < _iFFTRealDetectorCount; ++i) { +		pfData[2*i+1] = 0.0f; + +		if (i & 1) { +			int j = i; +			if (2*j > _iFFTRealDetectorCount) +				j = _iFFTRealDetectorCount - j; +			float f = M_PI * j; +			pfData[2*i] = -1 / (f*f); +		} else { +			pfData[2*i] = 0.0f; +		} +	} + +	pfData[0] = 0.25f; + +	cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w); + +	for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) +	{ +		float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; + +		pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex]; +		pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex; +	} + +	delete[] pfData; +	delete[] ip; +	delete[] w; +#endif  	switch(_eFilter)  	{ diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index cc86b70..e2c1e43 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -41,7 +41,6 @@ ReconAlgo3D::ReconAlgo3D()  	coneProjs = 0;  	par3DProjs = 0;  	shouldAbort = false; -	fOutputScale = 1.0f;  }  ReconAlgo3D::~ReconAlgo3D() @@ -58,10 +57,10 @@ void ReconAlgo3D::reset()  	shouldAbort = false;  } -bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale) +bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, const SProjectorParams3D& _params)  {  	dims = _dims; -	fOutputScale = _outputScale; +	params = _params;  	coneProjs = new SConeProjection[dims.iProjAngles];  	par3DProjs = 0; @@ -71,10 +70,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject  	return true;  } -bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale) +bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, const SProjectorParams3D& _params)  {  	dims = _dims; -	fOutputScale = _outputScale; +	params = _params;  	par3DProjs = new SPar3DProjection[dims.iProjAngles];  	coneProjs = 0; @@ -89,10 +88,12 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,                         cudaPitchedPtr& D_projData,                         float outputScale)  { +	SProjectorParams3D p = params; +	p.fOutputScale *= outputScale;  	if (coneProjs) { -		return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); +		return ConeFP(D_volumeData, D_projData, dims, coneProjs, p);  	} else { -		return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); +		return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, p);  	}  } @@ -100,10 +101,12 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,                         cudaPitchedPtr& D_projData,                         float outputScale)  { +	SProjectorParams3D p = params; +	p.fOutputScale *= outputScale;  	if (coneProjs) { -		return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); +		return ConeBP(D_volumeData, D_projData, dims, coneProjs, p);  	} else { -		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); +		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, p);  	}  } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index 886b092..ce331ad 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -39,8 +39,8 @@ public:  	ReconAlgo3D();  	~ReconAlgo3D(); -	bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale); -	bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale); +	bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, const SProjectorParams3D& params); +	bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, const SProjectorParams3D& params);  	void signalAbort() { shouldAbort = true; } @@ -55,6 +55,7 @@ protected:  	            float outputScale);  	SDimensions3D dims; +	SProjectorParams3D params;  	SConeProjection* coneProjs;  	SPar3DProjection* par3DProjs; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 5670873..91f4530 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -67,34 +67,41 @@ template<typename ProjectionT>  static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,                            unsigned int iProjectionAngleCount,                            ProjectionT*& pProjs, -                          float& fOutputScale) +                          SProjectorParams3D& params)  {  	assert(pVolGeom);  	assert(pProjs); +#if 0  	// TODO: Relative instead of absolute  	const float EPS = 0.00001f;  	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)  		return false;  	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS)  		return false; - +#endif  	// Translate  	float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;  	float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;  	float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2; -	float factor = 1.0f / pVolGeom->getPixelLengthX(); +	float fx = 1.0f / pVolGeom->getPixelLengthX(); +	float fy = 1.0f / pVolGeom->getPixelLengthY(); +	float fz = 1.0f / pVolGeom->getPixelLengthZ();  	for (int i = 0; i < iProjectionAngleCount; ++i) {  		// CHECKME: Order of scaling and translation  		pProjs[i].translate(dx, dy, dz); -		pProjs[i].scale(factor); +		pProjs[i].scale(fx, fy, fz);  	} +	params.fVolScaleX = pVolGeom->getPixelLengthX(); +	params.fVolScaleY = pVolGeom->getPixelLengthY(); +	params.fVolScaleZ = pVolGeom->getPixelLengthZ(); +  	// CHECKME: Check factor -	fOutputScale *= pVolGeom->getPixelLengthX(); +	//params.fOutputScale *= pVolGeom->getPixelLengthX();  	return true;  } @@ -108,10 +115,8 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,  	dims.iVolY = pVolGeom->getGridRowCount();  	dims.iVolZ = pVolGeom->getGridSliceCount();  	dims.iProjAngles = pProjGeom->getProjectionCount(); -	dims.iProjU = pProjGeom->getDetectorColCount(), -	dims.iProjV = pProjGeom->getDetectorRowCount(), -	dims.iRaysPerDetDim = 1; -	dims.iRaysPerVoxelDim = 1; +	dims.iProjU = pProjGeom->getDetectorColCount(); +	dims.iProjV = pProjGeom->getDetectorRowCount();  	if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0)  		return false; @@ -124,7 +129,7 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,  bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CParallelProjectionGeometry3D* pProjGeom, -                          SPar3DProjection*& pProjs, float& fOutputScale) +                          SPar3DProjection*& pProjs, SProjectorParams3D& params)  {  	assert(pVolGeom);  	assert(pProjGeom); @@ -141,16 +146,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  	bool ok; -	fOutputScale = 1.0f; - -	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);  	return ok;  }  bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CParallelVecProjectionGeometry3D* pProjGeom, -                          SPar3DProjection*& pProjs, float& fOutputScale) +                          SPar3DProjection*& pProjs, SProjectorParams3D& params)  {  	assert(pVolGeom);  	assert(pProjGeom); @@ -164,16 +167,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  	bool ok; -	fOutputScale = 1.0f; - -	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);  	return ok;  }  bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CConeProjectionGeometry3D* pProjGeom, -                          SConeProjection*& pProjs, float& fOutputScale) +                          SConeProjection*& pProjs, SProjectorParams3D& params)  {  	assert(pVolGeom);  	assert(pProjGeom); @@ -192,16 +193,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  	bool ok; -	fOutputScale = 1.0f; - -	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);  	return ok;  }  bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CConeVecProjectionGeometry3D* pProjGeom, -                          SConeProjection*& pProjs, float& fOutputScale) +                          SConeProjection*& pProjs, SProjectorParams3D& params)  {  	assert(pVolGeom);  	assert(pProjGeom); @@ -215,9 +214,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  	bool ok; -	fOutputScale = 1.0f; - -	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);  	return ok;  } @@ -227,7 +224,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CProjectionGeometry3D* pProjGeom,                            SPar3DProjection*& pParProjs,                            SConeProjection*& pConeProjs, -                          float& fOutputScale) +                          SProjectorParams3D& params)  {  	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);  	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); @@ -240,13 +237,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  	bool ok;  	if (conegeom) { -		ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale); +		ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, params);  	} else if (conevec3dgeom) { -		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale); +		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, params);  	} else if (par3dgeom) { -		ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale); +		ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, params);  	} else if (parvec3dgeom) { -		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale); +		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, params);  	} else {  		ok = false;  	} @@ -260,20 +257,17 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,  class AstraSIRT3d_internal {  public:  	SDimensions3D dims; +	SProjectorParams3D params;  	CUDAProjectionType3d projType;  	float* angles;  	float fOriginSourceDistance;  	float fOriginDetectorDistance; -	float fSourceZ; -	float fDetSize;  	float fRelaxation;  	SConeProjection* projs;  	SPar3DProjection* parprojs; -	float fOutputScale; -  	bool initialized;  	bool setStartReconstruction; @@ -305,12 +299,9 @@ AstraSIRT3d::AstraSIRT3d()  	pData->dims.iProjAngles = 0;  	pData->dims.iProjU = 0;  	pData->dims.iProjV = 0; -	pData->dims.iRaysPerDetDim = 1; -	pData->dims.iRaysPerVoxelDim = 1;  	pData->projs = 0;  	pData->parprojs = 0; -	pData->fOutputScale = 1.0f;  	pData->fRelaxation = 1.0f; @@ -361,7 +352,7 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pData->parprojs, pData->projs, -	                          pData->fOutputScale); +	                          pData->params);  	if (!ok)  		return false; @@ -386,8 +377,8 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,  	if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)  		return false; -	pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; -	pData->dims.iRaysPerDetDim = iDetectorSuperSampling; +	pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; +	pData->params.iRaysPerDetDim = iDetectorSuperSampling;  	return true;  } @@ -447,9 +438,9 @@ bool AstraSIRT3d::init()  	bool ok;  	if (pData->projType == PROJ_PARALLEL) { -		ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); +		ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);  	} else { -		ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); +		ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->params);  	}  	if (!ok) @@ -652,19 +643,16 @@ float AstraSIRT3d::computeDiffNorm()  class AstraCGLS3d_internal {  public:  	SDimensions3D dims; +	SProjectorParams3D params;  	CUDAProjectionType3d projType;  	float* angles;  	float fOriginSourceDistance;  	float fOriginDetectorDistance; -	float fSourceZ; -	float fDetSize;  	SConeProjection* projs;  	SPar3DProjection* parprojs; -	float fOutputScale; -  	bool initialized;  	bool setStartReconstruction; @@ -696,12 +684,9 @@ AstraCGLS3d::AstraCGLS3d()  	pData->dims.iProjAngles = 0;  	pData->dims.iProjU = 0;  	pData->dims.iProjV = 0; -	pData->dims.iRaysPerDetDim = 1; -	pData->dims.iRaysPerVoxelDim = 1;  	pData->projs = 0;  	pData->parprojs = 0; -	pData->fOutputScale = 1.0f;  	pData->initialized = false;  	pData->setStartReconstruction = false; @@ -750,7 +735,7 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pData->parprojs, pData->projs, -	                          pData->fOutputScale); +	                          pData->params);  	if (!ok)  		return false; @@ -774,8 +759,8 @@ bool AstraCGLS3d::enableSuperSampling(unsigned int iVoxelSuperSampling,  	if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)  		return false; -	pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; -	pData->dims.iRaysPerDetDim = iDetectorSuperSampling; +	pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; +	pData->params.iRaysPerDetDim = iDetectorSuperSampling;  	return true;  } @@ -829,9 +814,9 @@ bool AstraCGLS3d::init()  	bool ok;  	if (pData->projType == PROJ_PARALLEL) { -		ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); +		ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);  	} else { -		ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); +		ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->params);  	}  	if (!ok) @@ -1039,23 +1024,23 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,                   Cuda3DProjectionKernel projKernel)  {  	SDimensions3D dims; +	SProjectorParams3D params; + +	params.iRaysPerDetDim = iDetectorSuperSampling;  	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);  	if (!ok)  		return false; -	dims.iRaysPerDetDim = iDetectorSuperSampling;  	if (iDetectorSuperSampling == 0)  		return false;  	SPar3DProjection* pParProjs;  	SConeProjection* pConeProjs; -	float outputScale; -  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pParProjs, pConeProjs, -	                          outputScale); +	                          params);  	if (iGPUIndex != -1) { @@ -1093,10 +1078,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,  	if (pParProjs) {  		switch (projKernel) {  		case ker3d_default: -			ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale); +			ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, params);  			break;  		case ker3d_sum_square_weights: -			ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale); +			ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, params);  			break;  		default:  			assert(false); @@ -1104,7 +1089,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,  	} else {  		switch (projKernel) {  		case ker3d_default: -			ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale); +			ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, params);  			break;  		default:  			assert(false); @@ -1129,21 +1114,20 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,                   int iGPUIndex, int iVoxelSuperSampling)  {  	SDimensions3D dims; +	SProjectorParams3D params; + +	params.iRaysPerVoxelDim = iVoxelSuperSampling;  	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);  	if (!ok)  		return false; -	dims.iRaysPerVoxelDim = iVoxelSuperSampling; -  	SPar3DProjection* pParProjs;  	SConeProjection* pConeProjs; -	float outputScale; -  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pParProjs, pConeProjs, -	                          outputScale); +	                          params);  	if (iGPUIndex != -1) {  		cudaSetDevice(iGPUIndex); @@ -1179,9 +1163,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,  	}  	if (pParProjs) -		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); +		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);  	else -		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); +		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);  	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1204,21 +1188,21 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,                        int iGPUIndex, int iVoxelSuperSampling)  {  	SDimensions3D dims; +	SProjectorParams3D params; + +	params.iRaysPerVoxelDim = iVoxelSuperSampling;  	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);  	if (!ok)  		return false; -	dims.iRaysPerVoxelDim = iVoxelSuperSampling;  	SPar3DProjection* pParProjs;  	SConeProjection* pConeProjs; -	float outputScale; -  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pParProjs, pConeProjs, -	                          outputScale); +	                          params);  	if (iGPUIndex != -1) {  		cudaSetDevice(iGPUIndex); @@ -1255,9 +1239,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,  	processSino3D<opSet>(D_projData, 1.0f, dims);  	if (pParProjs) -		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale); +		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, params);  	else -		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale); +		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, params);  	processVol3D<opInvert>(D_pixelWeight, dims);  	if (!ok) { @@ -1272,9 +1256,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,  	ok &= zeroVolumeData(D_volumeData, dims);  	// Do BP into D_volumeData  	if (pParProjs) -		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); +		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);  	else -		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); +		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);  	// Multiply with weights  	processVol3D<opMul>(D_volumeData, D_pixelWeight, dims); @@ -1307,81 +1291,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, -bool astraCudaFDK(float* pfVolume, const float* pfProjections, -                  const CVolumeGeometry3D* pVolGeom, -                  const CConeProjectionGeometry3D* pProjGeom, -                  bool bShortScan, -                  int iGPUIndex, int iVoxelSuperSampling) -{ -	SDimensions3D dims; - -	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - -	// TODO: Check that pVolGeom is normalized, since we don't support -	// other volume geometries yet - -	if (!ok) -		return false; - -	dims.iRaysPerVoxelDim = iVoxelSuperSampling; - -	if (iVoxelSuperSampling == 0) -		return false; - -	if (iGPUIndex != -1) { -		cudaSetDevice(iGPUIndex); -		cudaError_t err = cudaGetLastError(); - -		// Ignore errors caused by calling cudaSetDevice multiple times -		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) -			return false; -	} - -	cudaPitchedPtr D_volumeData = allocateVolumeData(dims); -	ok = D_volumeData.ptr; -	if (!ok) -		return false; - -	cudaPitchedPtr D_projData = allocateProjectionData(dims); -	ok = D_projData.ptr; -	if (!ok) { -		cudaFree(D_volumeData.ptr); -		return false; -	} - -	ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU); - -	ok &= zeroVolumeData(D_volumeData, dims); - -	if (!ok) { -		cudaFree(D_volumeData.ptr); -		cudaFree(D_projData.ptr); -		return false; -	} - -	float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); -	float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); -	float fDetUSize = pProjGeom->getDetectorSpacingX(); -	float fDetVSize = pProjGeom->getDetectorSpacingY(); -	const float *pfAngles = pProjGeom->getProjectionAngles(); - - -	// TODO: Offer interface for SrcZ, DetZ -	ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, -	          fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, -	          dims, pfAngles, bShortScan); - -	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - -	cudaFree(D_volumeData.ptr); -	cudaFree(D_projData.ptr); - -	return ok; - -} - - - -  } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 2137587..93bf576 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -37,11 +37,6 @@ namespace astra {  // TODO: Switch to a class hierarchy as with the 2D algorithms -enum Cuda3DProjectionKernel { -	ker3d_default = 0, -	ker3d_sum_square_weights -}; -  class CProjectionGeometry3D;  class CParallelProjectionGeometry3D;  class CParallelVecProjectionGeometry3D; @@ -50,6 +45,10 @@ class CConeVecProjectionGeometry3D;  class CVolumeGeometry3D;  class AstraSIRT3d_internal; +using astraCUDA3d::Cuda3DProjectionKernel; +using astraCUDA3d::ker3d_default; +using astraCUDA3d::ker3d_sum_square_weights; +  class _AstraExport AstraSIRT3d {  public: @@ -291,7 +290,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,                            const CProjectionGeometry3D* pProjGeom,                            SPar3DProjection*& pParProjs,                            SConeProjection*& pConeProjs, -                          float& fOutputScale); +                          astraCUDA3d::SProjectorParams3D& params);  _AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,                        const CVolumeGeometry3D* pVolGeom, @@ -310,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje                        const CProjectionGeometry3D* pProjGeom,                        int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, -                  const CVolumeGeometry3D* pVolGeom, -                  const CConeProjectionGeometry3D* pProjGeom, -                  bool bShortScan, -                  int iGPUIndex, int iVoxelSuperSampling); - -  } diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index dd0e8a0..0b0331e 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData,  	CGLS cgls;  	bool ok = true; -	ok &= cgls.setConeGeometry(dims, angles, 1.0f); +	ok &= cgls.setConeGeometry(dims, angles, SProjectorParams3D());  	if (D_maskData.ptr)  		ok &= cgls.enableVolumeMask(); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 4a41f6a..6bd9d16 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -77,6 +77,7 @@ bool bindProjDataTexture(const cudaArray* array)  //__launch_bounds__(32*16, 4) +template<bool FDKWEIGHT>  __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,                              int angleOffset, const astraCUDA3d::SDimensions3D dims,                              float fOutputScale) @@ -134,7 +135,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  				fU = fUNum * fr;  				fV = fVNum * fr;  				float fVal = tex3D(gT_coneProjTexture, fU, fAngle, fV); -				Z[idx] += fVal; // fr*fr*fVal; +				if (FDKWEIGHT) +					Z[idx] += fr*fr*fVal; +				else +					Z[idx] += fVal;  				fUNum += fCu.z;  				fVNum += fCv.z; @@ -154,7 +158,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -185,12 +189,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  	if (endZ > dims.iVolZ)  		endZ = dims.iVolZ; -	float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; -	float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; -	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; -	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; +	float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; +	float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; +	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; +	const float fSubStep = 1.0f/iRaysPerVoxelDim; -	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); +	fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);  	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -216,11 +220,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  			const float fCdc = gC_C[12*angle+11];  			float fXs = fX; -			for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { +			for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {  			float fYs = fY; -			for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { +			for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {  			float fZs = fZ; -			for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { +			for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {  				const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;  				const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -248,10 +252,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  bool ConeBP_Array(cudaPitchedPtr D_volumeData,                    cudaArray *D_projArray,                    const SDimensions3D& dims, const SConeProjection* angles, -                  float fOutputScale) +                  const SProjectorParams3D& params)  {  	bindProjDataTexture(D_projArray); +	float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; +  	for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) {  		unsigned int angleCount = g_MaxAngles;  		if (th + angleCount > dims.iProjAngles) @@ -295,10 +301,12 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  		// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);  -			if (dims.iRaysPerVoxelDim == 1) -				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +			if (params.bFDKWeighting) +				dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +			else if (params.iRaysPerVoxelDim == 1) +				dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  			else -				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);  		}  		cudaTextForceKernelsCompletion(); @@ -315,14 +323,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  bool ConeBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SConeProjection* angles, -            float fOutputScale) +            const SProjectorParams3D& params)  {  	// transfer projections to array  	cudaArray* cuArray = allocateProjectionArray(dims);  	transferProjectionsToArray(D_projData, cuArray, dims); -	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); +	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params);  	cudaFreeArray(cuArray); diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index 4d3d2dd..a6e2c23 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d {  _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,                    cudaArray *D_projArray,                    const SDimensions3D& dims, const SConeProjection* angles, -                  float fOutputScale); +                  const SProjectorParams3D& params);  _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SConeProjection* angles, -            float fOutputScale); +            const SProjectorParams3D& params);  } diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 13b184f..fefcdc1 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z {  	__device__ float z(float f0, float f1, float f2) const { return f0; }  }; +struct SCALE_CUBE { +	float fOutputScale; +	__device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { +	float fScale1; +	float fScale2; +	float fOutputScale; +	__device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; +  	// threadIdx: x = ??? detector  (u?)  	//            y = relative angle @@ -135,11 +147,12 @@ struct DIR_Z {  	// blockIdx:  x = ??? detector  (u+v?)      //            y = angle block -template<class COORD> +template<class COORD, class SCALE>  __global__ void cone_FP_t(float* D_projData, unsigned int projPitch,                            unsigned int startSlice,                            unsigned int startAngle, unsigned int endAngle, -                          const SDimensions3D dims, float fOutputScale) +                          const SDimensions3D dims, +                          SCALE sc)  {  	COORD c; @@ -188,7 +201,7 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch,  		const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ);  		const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); -		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; +		const float fDistCorr = sc.scale(a1, a2);  		float fVal = 0.0f; @@ -214,7 +227,8 @@ template<class COORD>  __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,                               unsigned int startSlice,                               unsigned int startAngle, unsigned int endAngle, -                             const SDimensions3D dims, float fOutputScale) +                             const SDimensions3D dims, int iRaysPerDetDim, +                             SCALE_NONCUBE sc)  {  	COORD c; @@ -245,7 +259,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,  	if (endSlice > c.nSlices(dims))  		endSlice = c.nSlices(dims); -	const float fSubStep = 1.0f/dims.iRaysPerDetDim; +	const float fSubStep = 1.0f/iRaysPerDetDim;  	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)  	{ @@ -255,9 +269,9 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,  		float fV = 0.0f;  		float fdU = detectorU - 0.5f + 0.5f*fSubStep; -		for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { +		for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {  		float fdV = detectorV - 0.5f + 0.5f*fSubStep; -		for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { +		for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {  		const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX;  		const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY; @@ -272,7 +286,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,  		const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ);  		const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); -		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; +		const float fDistCorr = sc.scale(a1, a2);  		float fVal = 0.0f; @@ -294,14 +308,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,  		}  		} -		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); +		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);  	}  }  bool ConeFP_Array_internal(cudaPitchedPtr D_projData,                    const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles, -                  float fOutputScale) +                  const SProjectorParams3D& params)  {  	// transfer angles to constant memory  	float* tmp = new float[angleCount]; @@ -336,6 +350,36 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,  	unsigned int blockEnd = 0;  	int blockDirection = 0; +	bool cube = true; +	if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) +		cube = false; +	if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) +		cube = false; + +	SCALE_CUBE scube; +	scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + +	SCALE_NONCUBE snoncubeX; +	float fS1 = params.fVolScaleY / params.fVolScaleX; +	snoncubeX.fScale1 = fS1 * fS1; +	float fS2 = params.fVolScaleZ / params.fVolScaleX; +	snoncubeX.fScale2 = fS2 * fS2; +	snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + +	SCALE_NONCUBE snoncubeY; +	fS1 = params.fVolScaleX / params.fVolScaleY; +	snoncubeY.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleY; +	snoncubeY.fScale2 = fS2 * fS2; +	snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + +	SCALE_NONCUBE snoncubeZ; +	fS1 = params.fVolScaleX / params.fVolScaleZ; +	snoncubeZ.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleZ; +	snoncubeZ.fScale2 = fS2 * fS2; +	snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; +  	// timeval t;  	// tic(t); @@ -373,22 +417,31 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,  				if (blockDirection == 0) {  					for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							if (cube) +								cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +							else +								cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);  						else -							cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX);  				} else if (blockDirection == 1) {  					for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							if (cube) +								cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +							else +								cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);  						else -							cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY);  				} else if (blockDirection == 2) {  					for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							if (cube) +								cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +							else +								cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);  						else -							cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ);  				}  			} @@ -414,7 +467,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,  bool ConeFP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SConeProjection* angles, -            float fOutputScale) +            const SProjectorParams3D& params)  {  	// transfer volume to array @@ -434,7 +487,7 @@ bool ConeFP(cudaPitchedPtr D_volumeData,  		ret = ConeFP_Array_internal(D_subprojData,  		                            dims, iEndAngle - iAngle, angles + iAngle, -		                            fOutputScale); +		                            params);  		if (!ret)  			break;  	} diff --git a/cuda/3d/cone_fp.h b/cuda/3d/cone_fp.h index 969a6d8..62c9caa 100644 --- a/cuda/3d/cone_fp.h +++ b/cuda/3d/cone_fp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d {  _AstraExport bool ConeFP_Array(cudaArray *D_volArray,                    cudaPitchedPtr D_projData,                    const SDimensions3D& dims, const SConeProjection* angles, -                  float fOutputScale); +                  const SProjectorParams3D& params);  _AstraExport bool ConeFP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SConeProjection* angles, -            float fOutputScale); +            const SProjectorParams3D& params);  } diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h index 5437a85..eb7045f 100644 --- a/cuda/3d/dims3d.h +++ b/cuda/3d/dims3d.h @@ -37,6 +37,13 @@ namespace astraCUDA3d {  using astra::SConeProjection;  using astra::SPar3DProjection; + +enum Cuda3DProjectionKernel { +	ker3d_default = 0, +	ker3d_sum_square_weights +}; + +  struct SDimensions3D {  	unsigned int iVolX;  	unsigned int iVolY; @@ -44,8 +51,25 @@ struct SDimensions3D {  	unsigned int iProjAngles;  	unsigned int iProjU; // number of detectors in the U direction  	unsigned int iProjV; // number of detectors in the V direction +}; + +struct SProjectorParams3D { +	SProjectorParams3D() : +	    iRaysPerDetDim(1), iRaysPerVoxelDim(1), +	    fOutputScale(1.0f), +	    fVolScaleX(1.0f), fVolScaleY(1.0f), fVolScaleZ(1.0f), +	    ker(ker3d_default), +	    bFDKWeighting(false) +	{ } +  	unsigned int iRaysPerDetDim;  	unsigned int iRaysPerVoxelDim; +	float fOutputScale; +	float fVolScaleX; +	float fVolScaleY; +	float fVolScaleZ; +	Cuda3DProjectionKernel ker; +	bool bFDKWeighting;  };  } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..d847eee 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -41,8 +41,11 @@ $Id$  #include "dims3d.h"  #include "arith3d.h" +#include "cone_bp.h"  #include "../2d/fft.h" +#include "../../include/astra/Logging.h" +  typedef texture<float, 3, cudaReadModeElementType> texture3D;  static texture3D gT_coneProjTexture; @@ -86,129 +89,7 @@ static bool bindProjDataTexture(const cudaArray* array)  } -__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims) -{ -	float* volData = (float*)D_volData; - -	int endAngle = startAngle + g_anglesPerBlock; -	if (endAngle > dims.iProjAngles) -		endAngle = dims.iProjAngles; - -	// threadIdx: x = rel x -	//            y = rel y - -	// blockIdx:  x = x + y -    //            y = z - - -	// TO TRY: precompute part of detector intersection formulas in shared mem? -	// TO TRY: inner loop over z, gather ray values in shared mem - -	const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x; -	const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y; - -	if (X > dims.iVolX) -		return; -	if (Y > dims.iVolY) -		return; - -	const int startZ = blockIdx.y * g_volBlockZ; -	int endZ = startZ + g_volBlockZ; -	if (endZ > dims.iVolZ) -		endZ = dims.iVolZ; - -	float fX = X - 0.5f*dims.iVolX + 0.5f; -	float fY = Y - 0.5f*dims.iVolY + 0.5f; -	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ; - -	const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f; -	const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ); - -	// Note re. fZ/rV_base: the computations below are all relative to the -	// optical axis, so we do the Z-adjustments beforehand. - -	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) -	{ - -		float fVal = 0.0f; -		float fAngle = startAngle + 0.5f; - -		for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f) -		{ - -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; - -			const float fR = fSrcOrigin; -			const float fD = fR - fX * sin_theta + fY * cos_theta; -			float fWeight = fR / fD; -			fWeight *= fWeight; - -			const float fScaleFactor = (fR + fDetOrigin) / fD; -			const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize; -			const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize; - -			fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV); - -		} - -		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal; -//		projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f; -//		if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); } -	} - -} - - -bool FDK_BP(cudaPitchedPtr D_volumeData, -            cudaPitchedPtr D_projData, -            float fSrcOrigin, float fDetOrigin, -            float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -            const SDimensions3D& dims, const float* angles) -{ -	// transfer projections to array - -	cudaArray* cuArray = allocateProjectionArray(dims); -	transferProjectionsToArray(D_projData, cuArray, dims); - -	bindProjDataTexture(cuArray); - -	float* angle_sin = new float[dims.iProjAngles]; -	float* angle_cos = new float[dims.iProjAngles]; - -	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { -		angle_sin[i] = sinf(angles[i]); -		angle_cos[i] = cosf(angles[i]); -	} -	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	assert(e1 == cudaSuccess); -	assert(e2 == cudaSuccess); - -	delete[] angle_sin; -	delete[] angle_cos; - -	dim3 dimBlock(g_volBlockX, g_volBlockY); - -	dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); - -	// timeval t; -	// tic(t); - -	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { -		devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims); -	} - -	cudaTextForceKernelsCompletion(); - -	cudaFreeArray(cuArray); - -	// printf("%f\n", toc(t)); - -	return true; -} - -__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims) +__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims)  {  	float* projData = (float*)D_projData;  	int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -230,21 +111,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig  	const float fT = fCentralRayLength * fCentralRayLength + fU * fU; -	float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ; +	float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift; + +	//const float fW = fCentralRayLength; +	//const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles; +	const float fW1 = fSrcOrigin * fDetUSize * fDetVSize; +	const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles;  	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)  	{  		const float fRayLength = sqrtf(fT + fV * fV); -		const float fWeight = fCentralRayLength / fRayLength; +		const float fWeight = fW / fRayLength;  		projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight; -		fV += 1.0f; +		fV += fDetVSize;  	}  } -__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) +__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)  {  	float* projData = (float*)D_projData;  	int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -305,7 +191,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un  // Perform the FDK pre-weighting and filtering  bool FDK_PreWeight(cudaPitchedPtr D_projData,                  float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, +                float fZShift,                  float fDetUSize, float fDetVSize, bool bShortScan,                  const SDimensions3D& dims, const float* angles)  { @@ -318,23 +204,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,  	int projPitch = D_projData.pitch/sizeof(float); -	devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims); +	devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims);  	cudaTextForceKernelsCompletion(); -	if (bShortScan) { +	if (bShortScan && dims.iProjAngles > 1) { +		ASTRA_DEBUG("Doing Parker weighting");  		// We do short-scan Parker weighting -		cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles, +		// First, determine (in a very basic way) the interval that's +		// been scanned. We assume angles[0] is one of the endpoints of the +		// range. +		float fdA = angles[1] - angles[0]; + +		while (fdA < -M_PI) +			fdA += 2*M_PI; +		while (fdA >= M_PI) +			fdA -= 2*M_PI; + +		float fAngleBase; +		if (fdA >= 0.0f) { +			// going up from angles[0] +			fAngleBase = angles[dims.iProjAngles - 1]; +		} else { +			// going down from angles[0] +			fAngleBase = angles[0]; +		} + +		// We pick the highest end of the range, and then +		// move all angles so they fall in (-2pi,0] + +		float *fRelAngles = new float[dims.iProjAngles]; +		for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +			float f = angles[i] - fAngleBase; +			while (f > 0) +				f -= 2*M_PI; +			while (f <= -2*M_PI) +				f += 2*M_PI; +			fRelAngles[i] = f; + +		} + +		cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles,  		                                    dims.iProjAngles*sizeof(float), 0,  		                                    cudaMemcpyHostToDevice);  		assert(!e1); +		delete[] fRelAngles; -		// TODO: detector pixel size! -		float fCentralFanAngle = atanf((dims.iProjU*0.5f) / +		float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) /  		                               (fSrcOrigin + fDetOrigin)); -		devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims); +		devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims);  	} @@ -344,10 +264,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,  bool FDK_Filter(cudaPitchedPtr D_projData,                  cufftComplex * D_filter, -                float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, -                float fDetUSize, float fDetVSize, bool bShortScan, -                const SDimensions3D& dims, const float* angles) +                const SDimensions3D& dims)  {  	// The filtering is a regular ramp filter per detector line. @@ -392,9 +309,8 @@ bool FDK_Filter(cudaPitchedPtr D_projData,  bool FDK(cudaPitchedPtr D_volumeData,           cudaPitchedPtr D_projData, -         float fSrcOrigin, float fDetOrigin, -         float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -         const SDimensions3D& dims, const float* angles, bool bShortScan) +         const SConeProjection* angles, +         const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan)  {  	bool ok;  	// Generate filter @@ -403,12 +319,45 @@ bool FDK(cudaPitchedPtr D_volumeData,  	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);  	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + +	// NB: We don't support arbitrary cone_vec geometries here. +	// Only those that are vertical sub-geometries +	// (cf. CompositeGeometryManager) of regular cone geometries. +	assert(dims.iProjAngles > 0); +	const SConeProjection& p0 = angles[0]; + +	// assuming U is in the XY plane, V is parallel to Z axis +	float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX; +	float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY; +	float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ; + +	float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY); +	float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY); +	float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY); +	float fDetVSize = abs(p0.fDetVZ); + +	float fZShift = fDetCZ - p0.fSrcZ; + +	float *pfAngles = new float[dims.iProjAngles]; +	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +		// FIXME: Sign/order +		pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI; +	} + + +#if 1  	ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, -	                fSrcZ, fDetZ, fDetUSize, fDetVSize, -	                bShortScan, dims, angles); +	                fZShift, fDetUSize, fDetVSize, +	                bShortScan, dims, pfAngles); +#else +	ok = true; +#endif +	delete[] pfAngles; +  	if (!ok)  		return false; +#if 1  	cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];  	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); @@ -425,28 +374,25 @@ bool FDK(cudaPitchedPtr D_volumeData, -	ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin, -	                fSrcZ, fDetZ, fDetUSize, fDetVSize, -	                bShortScan, dims, angles); +	ok = FDK_Filter(D_projData, D_filter, dims);  	// Clean up filter  	freeComplexOnDevice(D_filter); - +#endif  	if (!ok)  		return false;  	// Perform BP -	ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, -	            fDetUSize, fDetVSize, dims, angles); +	params.bFDKWeighting = true; + +	//ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles); +	ok = ConeBP(D_volumeData, D_projData, dims, angles, params);  	if (!ok)  		return false; -	processVol3D<opMul>(D_volumeData, -	                  (M_PI / 2.0f) / (float)dims.iProjAngles, dims); -  	return true;  } diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h index de7fe53..8d2a128 100644 --- a/cuda/3d/fdk.h +++ b/cuda/3d/fdk.h @@ -35,16 +35,14 @@ namespace astraCUDA3d {  bool FDK_PreWeight(cudaPitchedPtr D_projData,                  float fSrcOrigin, float fDetOrigin, -                float fSrcZ, float fDetZ, +                float fZShift,                  float fDetUSize, float fDetVSize, bool bShortScan,                  const SDimensions3D& dims, const float* angles);  bool FDK(cudaPitchedPtr D_volumeData,           cudaPitchedPtr D_projData, -         float fSrcOrigin, float fDetOrigin, -         float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, -         const SDimensions3D& dims, const float* angles, bool bShortScan); - +         const SConeProjection* angles, +         const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan);  } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 0320117..4fb30a2 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -38,6 +38,7 @@ $Id$  #include "cone_bp.h"  #include "par3d_fp.h"  #include "par3d_bp.h" +#include "fdk.h"  #include "astra/Logging.h" @@ -193,17 +194,17 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)  bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)  {  	SDimensions3D dims; +	SProjectorParams3D params;  	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);  	if (!ok)  		return false;  #if 1 -	dims.iRaysPerDetDim = iDetectorSuperSampling; +	params.iRaysPerDetDim = iDetectorSuperSampling;  	if (iDetectorSuperSampling == 0)  		return false;  #else -	dims.iRaysPerDetDim = 1;  	astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default;  #endif @@ -211,11 +212,9 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  	SPar3DProjection* pParProjs;  	SConeProjection* pConeProjs; -	float outputScale = 1.0f; -  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pParProjs, pConeProjs, -	                          outputScale); +	                          params);  	if (pParProjs) {  #if 0 @@ -230,10 +229,10 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  		switch (projKernel) {  		case astra::ker3d_default: -			ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); +			ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);  			break;  		case astra::ker3d_sum_square_weights: -			ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale); +			ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);  			break;  		default:  			ok = false; @@ -241,7 +240,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  	} else {  		switch (projKernel) {  		case astra::ker3d_default: -			ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); +			ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);  			break;  		default:  			ok = false; @@ -254,35 +253,59 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling)  {  	SDimensions3D dims; +	SProjectorParams3D params;  	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);  	if (!ok)  		return false;  #if 1 -	dims.iRaysPerVoxelDim = iVoxelSuperSampling; -#else -	dims.iRaysPerVoxelDim = 1; +	params.iRaysPerVoxelDim = iVoxelSuperSampling;  #endif  	SPar3DProjection* pParProjs;  	SConeProjection* pConeProjs; -	float outputScale = 1.0f; -  	ok = convertAstraGeometry(pVolGeom, pProjGeom,  	                          pParProjs, pConeProjs, -	                          outputScale); +	                          params);  	if (pParProjs) -		ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); +		ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);  	else -		ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); +		ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);  	return ok;  } +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan) +{ +	SDimensions3D dims; +	SProjectorParams3D params; + +	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); +	if (!ok) +		return false; + +	SPar3DProjection* pParProjs; +	SConeProjection* pConeProjs; + +	ok = convertAstraGeometry(pVolGeom, pProjGeom, +	                          pParProjs, pConeProjs, +	                          params); + +	if (!ok || !pConeProjs) +		return false; + +	ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan); + +	return ok; + + + +} + diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 6fff80b..2d0fb27 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con  bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan);  } diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index cafab46..491ee2f 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -143,7 +143,7 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn  }  // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)  {  	float* volData = (float*)D_volData; @@ -174,13 +174,13 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star  	if (endZ > dims.iVolZ)  		endZ = dims.iVolZ; -	float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; -	float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; -	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; +	float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; +	float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; +	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; -	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; +	const float fSubStep = 1.0f/iRaysPerVoxelDim; -	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); +	fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);  	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -201,11 +201,11 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star  			const float fCvc = gC_C[8*angle+7];  			float fXs = fX; -			for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { +			for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {  			float fYs = fY; -			for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { +			for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {  			float fZs = fZ; -			for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { +			for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {  				const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;  				const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -228,10 +228,12 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star  bool Par3DBP_Array(cudaPitchedPtr D_volumeData,                     cudaArray *D_projArray,                     const SDimensions3D& dims, const SPar3DProjection* angles, -                   float fOutputScale) +                   const SProjectorParams3D& params)  {  	bindProjDataTexture(D_projArray); +	float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; +  	for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) {  		unsigned int angleCount = g_MaxAngles;  		if (th + angleCount > dims.iProjAngles) @@ -274,10 +276,10 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,  		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  			// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);  -			if (dims.iRaysPerVoxelDim == 1) +			if (params.iRaysPerVoxelDim == 1)  				dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);  			else -				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); +				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);  		}  		cudaTextForceKernelsCompletion(); @@ -293,14 +295,14 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,  bool Par3DBP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SPar3DProjection* angles, -            float fOutputScale) +            const SProjectorParams3D& params)  {  	// transfer projections to array  	cudaArray* cuArray = allocateProjectionArray(dims);  	transferProjectionsToArray(D_projData, cuArray, dims); -	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); +	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, params);  	cudaFreeArray(cuArray); diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index f1fc62d..3efad19 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d {  _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,                     cudaArray *D_projArray,                     const SDimensions3D& dims, const SPar3DProjection* angles, -                   float fOutputScale); +                   const SProjectorParams3D& params);  _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,               cudaPitchedPtr D_projData,               const SDimensions3D& dims, const SPar3DProjection* angles, -             float fOutputScale); +             const SProjectorParams3D& params);  } diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 3ce3d42..6f9ce40 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z {  	__device__ float z(float f0, float f1, float f2) const { return f0; }  }; +struct SCALE_CUBE { +	float fOutputScale; +	__device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { +	float fScale1; +	float fScale2; +	float fOutputScale; +	__device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; +  // threadIdx: x = u detector @@ -136,11 +148,12 @@ struct DIR_Z {  //            y = angle block -template<class COORD> +template<class COORD, class SCALE>  __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,                             unsigned int startSlice,                             unsigned int startAngle, unsigned int endAngle, -                           const SDimensions3D dims, float fOutputScale) +                           const SDimensions3D dims, +                           SCALE sc)  {  	COORD c; @@ -161,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,  	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;  	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; +	const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float fDistCorr = sc.scale(a1, a2);  	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -186,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,  		/* ray:   (y) = (ay) * x + (by)    */  		/*        (z)   (az)       (bz)    */ -		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); -		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);  		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);  		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); -		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; -  		float fVal = 0.0f;  		float f0 = startSlice + 0.5f; @@ -218,7 +230,8 @@ template<class COORD>  __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,                                unsigned int startSlice,                                unsigned int startAngle, unsigned int endAngle, -                              const SDimensions3D dims, float fOutputScale) +                              const SDimensions3D dims, int iRaysPerDetDim, +                              SCALE_NONCUBE sc)  {  	COORD c; @@ -239,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;  	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; - +	const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float fDistCorr = sc.scale(a1, a2);  	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;  	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; @@ -251,7 +266,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  	if (endSlice > c.nSlices(dims))  		endSlice = c.nSlices(dims); -	const float fSubStep = 1.0f/dims.iRaysPerDetDim; +	const float fSubStep = 1.0f/iRaysPerDetDim;  	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)  	{ @@ -259,9 +274,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  		float fV = 0.0f;  		float fdU = detectorU - 0.5f + 0.5f*fSubStep; -		for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { +		for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {  		float fdV = detectorV - 0.5f + 0.5f*fSubStep; -		for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { +		for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {  		/* Trace ray in direction Ray to (detectorU,detectorV) from  */  		/* X = startSlice to X = endSlice                            */ @@ -274,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  		/* ray:   (y) = (ay) * x + (by)    */  		/*        (z)   (az)       (bz)    */ -		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); -		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);  		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);  		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); -		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;  		float fVal = 0.0f; @@ -295,13 +307,13 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  			f2 += a2;  		} -		fVal *= fDistCorr;  		fV += fVal;  		}  		} -		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); +		fV *= fDistCorr; +		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);  	}  } @@ -324,7 +336,8 @@ template<class COORD>  __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,                                    unsigned int startSlice,                                    unsigned int startAngle, unsigned int endAngle, -                                  const SDimensions3D dims, float fOutputScale) +                                  const SDimensions3D dims, +                                  SCALE_NONCUBE sc)  {  	COORD c; @@ -345,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,  	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;  	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; +	const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +	const float fDistCorr = sc.scale(a1, a2);  	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -370,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,  		/* ray:   (y) = (ay) * x + (by)    */  		/*        (z)   (az)       (bz)    */ -		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); -		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);  		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);  		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); -		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; -  		float fVal = 0.0f;  		float f0 = startSlice + 0.5f; @@ -385,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,  		for (int s = startSlice; s < endSlice; ++s)  		{ -			fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; +			fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims));  			f0 += 1.0f;  			f1 += a1;  			f2 += a2;  		} +		fVal *= fDistCorr * fDistCorr;  		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;  	}  } @@ -401,7 +414,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,  bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,                     const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles, -                   float fOutputScale) +                   const SProjectorParams3D& params)  {  	// transfer angles to constant memory  	float* tmp = new float[dims.iProjAngles]; @@ -436,6 +449,36 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,  	unsigned int blockEnd = 0;  	int blockDirection = 0; +	bool cube = true; +	if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) +		cube = false; +	if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) +		cube = false; + +	SCALE_CUBE scube; +	scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + +	SCALE_NONCUBE snoncubeX; +	float fS1 = params.fVolScaleY / params.fVolScaleX; +	snoncubeX.fScale1 = fS1 * fS1; +	float fS2 = params.fVolScaleZ / params.fVolScaleX; +	snoncubeX.fScale2 = fS2 * fS2; +	snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + +	SCALE_NONCUBE snoncubeY; +	fS1 = params.fVolScaleX / params.fVolScaleY; +	snoncubeY.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleY; +	snoncubeY.fScale2 = fS2 * fS2; +	snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + +	SCALE_NONCUBE snoncubeZ; +	fS1 = params.fVolScaleX / params.fVolScaleZ; +	snoncubeZ.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleZ; +	snoncubeZ.fScale2 = fS2 * fS2; +	snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; +  	// timeval t;  	// tic(t); @@ -473,22 +516,31 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,  				if (blockDirection == 0) {  					for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +								if (cube) +										par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +								else +										par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);  						else -							par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX);  				} else if (blockDirection == 1) {  					for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +								if (cube) +										par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +								else +										par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);  						else -							par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY);  				} else if (blockDirection == 2) {  					for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +								if (cube) +										par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); +								else +										par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);  						else -							par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ);  				}  			} @@ -514,7 +566,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,  bool Par3DFP(cudaPitchedPtr D_volumeData,               cudaPitchedPtr D_projData,               const SDimensions3D& dims, const SPar3DProjection* angles, -             float fOutputScale) +             const SProjectorParams3D& params)  {  	// transfer volume to array  	cudaArray* cuArray = allocateVolumeArray(dims); @@ -533,7 +585,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,  		ret = Par3DFP_Array_internal(D_subprojData,  		                             dims, iEndAngle - iAngle, angles + iAngle, -		                             fOutputScale); +		                             params);  		if (!ret)  			break;  	} @@ -548,7 +600,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,  bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,                      cudaPitchedPtr D_projData,                      const SDimensions3D& dims, const SPar3DProjection* angles, -                    float fOutputScale) +                    const SProjectorParams3D& params)  {  	// transfer angles to constant memory  	float* tmp = new float[dims.iProjAngles]; @@ -583,6 +635,28 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  	unsigned int blockEnd = 0;  	int blockDirection = 0; +	SCALE_NONCUBE snoncubeX; +	float fS1 = params.fVolScaleY / params.fVolScaleX; +	snoncubeX.fScale1 = fS1 * fS1; +	float fS2 = params.fVolScaleZ / params.fVolScaleX; +	snoncubeX.fScale2 = fS2 * fS2; +	snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + +	SCALE_NONCUBE snoncubeY; +	fS1 = params.fVolScaleX / params.fVolScaleY; +	snoncubeY.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleY; +	snoncubeY.fScale2 = fS2 * fS2; +	snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + +	SCALE_NONCUBE snoncubeZ; +	fS1 = params.fVolScaleX / params.fVolScaleZ; +	snoncubeZ.fScale1 = fS1 * fS1; +	fS2 = params.fVolScaleY / params.fVolScaleZ; +	snoncubeZ.fScale2 = fS2 * fS2; +	snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + +  	// timeval t;  	// tic(t); @@ -620,8 +694,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  				if (blockDirection == 0) {  					for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);  						else  #if 0  							par3D_FP_SS_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -630,8 +704,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  #endif  				} else if (blockDirection == 1) {  					for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);  						else  #if 0  							par3D_FP_SS_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -640,8 +714,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  #endif  				} else if (blockDirection == 2) {  					for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) -						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +						if (params.iRaysPerDetDim == 1) +							par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);  						else  #if 0  							par3D_FP_SS_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); diff --git a/cuda/3d/par3d_fp.h b/cuda/3d/par3d_fp.h index 41f204f..5f65cd9 100644 --- a/cuda/3d/par3d_fp.h +++ b/cuda/3d/par3d_fp.h @@ -34,17 +34,17 @@ namespace astraCUDA3d {  _AstraExport bool Par3DFP_Array(cudaArray *D_volArray,                     cudaPitchedPtr D_projData,                     const SDimensions3D& dims, const SPar3DProjection* angles, -                   float fOutputScale); +                   const SProjectorParams3D& params);  _AstraExport bool Par3DFP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SPar3DProjection* angles, -            float fOutputScale); +            const SProjectorParams3D& params);  _AstraExport bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,                      cudaPitchedPtr D_projData,                      const SDimensions3D& dims, const SPar3DProjection* angles, -                    float fOutputScale); +                    const SProjectorParams3D& params);  } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 713944b..ba6a159 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -353,7 +353,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData,  	SIRT sirt;  	bool ok = true; -	ok &= sirt.setConeGeometry(dims, angles, 1.0f); +	ok &= sirt.setConeGeometry(dims, angles, SProjectorParams3D());  	if (D_maskData.ptr)  		ok &= sirt.enableVolumeMask(); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 18dd72f..064370a 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -55,6 +55,10 @@ struct SGPUParams {  	size_t memory;  }; +struct SFDKSettings { +	bool bShortScan; +}; +  class _AstraExport CCompositeGeometryManager {  public: @@ -127,9 +131,10 @@ public:  		CProjector3D *pProjector; // For a `global' geometry. It will not match  		                          // the geometries of the input and output. +		SFDKSettings FDKSettings;  		enum { -			JOB_FP, JOB_BP, JOB_NOP +			JOB_FP, JOB_BP, JOB_FDK, JOB_NOP  		} eType;  		enum {  			MODE_ADD, MODE_SET @@ -155,6 +160,8 @@ public:  	          CFloat32ProjectionData3DMemory *pProjData);  	bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,  	          CFloat32ProjectionData3DMemory *pProjData); +	bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, +	          CFloat32ProjectionData3DMemory *pProjData, bool bShortScan);  	bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);  	bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); diff --git a/include/astra/GeometryUtil3D.h b/include/astra/GeometryUtil3D.h index e4d73e4..e051240 100644 --- a/include/astra/GeometryUtil3D.h +++ b/include/astra/GeometryUtil3D.h @@ -56,19 +56,19 @@ struct SConeProjection {  		fDetSZ += dz;  	} -	void scale(double factor) { -		fSrcX *= factor; -		fSrcY *= factor; -		fSrcZ *= factor; -		fDetSX *= factor; -		fDetSY *= factor; -		fDetSZ *= factor; -		fDetUX *= factor; -		fDetUY *= factor; -		fDetUZ *= factor; -		fDetVX *= factor; -		fDetVY *= factor; -		fDetVZ *= factor; +	void scale(double fx, double fy, double fz) { +		fSrcX *= fx; +		fSrcY *= fy; +		fSrcZ *= fz; +		fDetSX *= fx; +		fDetSY *= fy; +		fDetSZ *= fz; +		fDetUX *= fx; +		fDetUY *= fy; +		fDetUZ *= fz; +		fDetVX *= fx; +		fDetVY *= fy; +		fDetVZ *= fz;  	}  }; @@ -93,19 +93,19 @@ struct SPar3DProjection {  		fDetSY += dy;  		fDetSZ += dz;  	} -	void scale(double factor) { -		fRayX *= factor; -		fRayY *= factor; -		fRayZ *= factor; -		fDetSX *= factor; -		fDetSY *= factor; -		fDetSZ *= factor; -		fDetUX *= factor; -		fDetUY *= factor; -		fDetUZ *= factor; -		fDetVX *= factor; -		fDetVY *= factor; -		fDetVZ *= factor; +	void scale(double fx, double fy, double fz) { +		fRayX *= fx; +		fRayY *= fy; +		fRayZ *= fz; +		fDetSX *= fx; +		fDetSY *= fy; +		fDetSZ *= fz; +		fDetUX *= fx; +		fDetUY *= fy; +		fDetUZ *= fz; +		fDetVX *= fx; +		fDetVY *= fy; +		fDetVZ *= fz;  	}  }; diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 084ba8c..c5b4d3b 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div  				newjob.eType = j->eType;  				newjob.eMode = j->eMode;  				newjob.pProjector = j->pProjector; +				newjob.FDKSettings = j->FDKSettings;  				CPart* input = job.pInput->reduce(outputPart.get()); @@ -992,6 +993,25 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat  	return doJobs(L);  } + +bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, +                                     CFloat32ProjectionData3DMemory *pProjData, bool bShortScan) +{ +	if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) { +		ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required"); +		return false; +	} + +	SJob job = createJobBP(pProjector, pVolData, pProjData); +	job.eType = SJob::JOB_FDK; +	job.FDKSettings.bShortScan = bShortScan; + +	TJobList L; +	L.push_back(job); + +	return doJobs(L); +} +  bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)  {  	ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume"); @@ -1185,7 +1205,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  		ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);  		if (!ok) ASTRA_ERROR("Error copying input data to GPU"); -		if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { +		switch (j.eType) { +		case CCompositeGeometryManager::SJob::JOB_FP: +		{  			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));  			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); @@ -1194,7 +1216,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);  			if (!ok) ASTRA_ERROR("Error performing sub-FP");  			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); -		} else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { +		} +		break; +		case CCompositeGeometryManager::SJob::JOB_BP: +		{  			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));  			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); @@ -1203,7 +1228,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter  			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);  			if (!ok) ASTRA_ERROR("Error performing sub-BP");  			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); -		} else { +		} +		break; +		case CCompositeGeometryManager::SJob::JOB_FDK: +		{ +			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); +			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + +			if (srcdims.subx || srcdims.suby) { +				ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK"); +				ok = false; +			} else { +				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK"); + +				ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan); +				if (!ok) ASTRA_ERROR("Error performing sub-FDK"); +				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done"); +			} +		} +		break; +		default:  			assert(false);  		} diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp index b5ce545..c7c8ed5 100644 --- a/src/CudaFDKAlgorithm3D.cpp +++ b/src/CudaFDKAlgorithm3D.cpp @@ -32,6 +32,7 @@ $Id$  #include "astra/CudaProjector3D.h"  #include "astra/ConeProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h"  #include "astra/Logging.h" @@ -81,6 +82,17 @@ bool CCudaFDKAlgorithm3D::_check()  	const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry();  	ASTRA_CONFIG_CHECK(dynamic_cast<const CConeProjectionGeometry3D*>(projgeom), "CUDA_FDK", "Error setting FDK geometry"); + +	const CVolumeGeometry3D* volgeom = m_pReconstruction->getGeometry(); +	bool cube = true; +	if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthY() - 1.0) > 0.00001) +		cube = false; +	if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthZ() - 1.0) > 0.00001) +		cube = false; +	ASTRA_CONFIG_CHECK(cube, "CUDA_FDK", "Voxels must be cubes for FDK"); + + +  	return true;  } @@ -195,6 +207,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)  	ASTRA_ASSERT(pReconMem); +#if 0  	bool ok = true;  	ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(), @@ -202,6 +215,13 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)  	                  m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling);  	ASTRA_ASSERT(ok); +#endif + +	CCompositeGeometryManager cgm; + +	cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan); + +  }  //----------------------------------------------------------------------------------------  | 
