diff options
| -rw-r--r-- | cuda/3d/par3d_bp.cu | 20 | ||||
| -rw-r--r-- | cuda/3d/par3d_fp.cu | 22 | ||||
| -rw-r--r-- | cuda/3d/rounding.h | 9 | 
3 files changed, 49 insertions, 2 deletions
| diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 27d95fe..7958ac9 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -54,6 +54,8 @@ __constant__ DevPar3DParams gC_C[g_MaxAngles];  __constant__ float gC_scale[g_MaxAngles]; +#include "rounding.h" +  template<unsigned int ZSIZE>  __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, cudaTextureObject_t tex, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)  { @@ -100,10 +102,26 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, cudaTexture  			float fU = fCu.w + fX * fCu.x + fY * fCu.y + fZ * fCu.z;  			float fV = fCv.w + fX * fCv.x + fY * fCv.y + fZ * fCv.z; +//			printf("%f %f\n", fU, fV);  			for (int idx = 0; idx < ZSIZE; ++idx) { +				float fVal; +				textype h5 = texto(0.5f); +				textype fU_ = texto(fU); +				textype fUf_ = texto(floor(fU)); +				float fUf = floor(fU); +				 +				if ((fU - fUf) < 0.5f) { +				    textype fVal1 = texto(tex3D<float>(tex, fUf - 0.5f, fAngle, fV)); +				    textype fVal2 = texto(tex3D<float>(tex, fUf + 0.5f, fAngle, fV)); +				    fVal = texfrom(fVal1 + (fU_ + h5 - fUf_) * (fVal2 - fVal1)); +				} else { +				    textype fVal1 = texto(tex3D<float>(tex, fUf + 0.5f, fAngle, fV)); +				    textype fVal2 = texto(tex3D<float>(tex, fUf + 1.5f, fAngle, fV)); +				    fVal = texfrom(fVal1 + (fU_ - h5 - fUf_) * (fVal2 - fVal1)); +				} -				float fVal = tex3D<float>(tex, fU, fAngle, fV); +//				float fVal = tex3D<float>(tex, fU, fAngle, fV);  				Z[idx] += fVal * fS;  				fU += fCu.z; diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index fda6f93..075784b 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -146,6 +146,7 @@ bool transferConstants(const SPar3DProjection* angles, unsigned int iProjAngles)  // blockIdx:  x = u/v detector  //            y = angle block +#include "rounding.h"  template<class COORD, class SCALE>  __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, @@ -212,10 +213,28 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,  		float f0 = startSlice + 0.5f;  		float f1 = a1 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b1 + 0.5f*c.nDim1(dims) - 0.5f + 0.5f;  		float f2 = a2 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b2 + 0.5f*c.nDim2(dims) - 0.5f + 0.5f; +		//printf("%f, %f (%f), %f (%f)\n", f0, f1, a1, f2, a2); // Only f1 non linear  		for (int s = startSlice; s < endSlice; ++s)  		{ -			fVal += c.tex(tex, f0, f1, f2); + +			textype h5 = texto(0.5f); +			textype f1_ = texto(f1); +			textype f1f_ = texto(floor(f1)); +			float f1f = floor(f1); +				 +			if ((f1 - f1f) < 0.5f) { +			    textype fVal1 = texto(c.tex(tex, f0, f1f - 0.5f, f2)); +			    textype fVal2 = texto(c.tex(tex, f0, f1f + 0.5f, f2)); +			    fVal += texfrom(fVal1 + (f1_ + h5 - f1f_) * (fVal2 - fVal1)); +//			    fVal += texfrom(__hfma(__hadd(h5,__hsub(f1_, f1f_)), __hsub(fVal2, fVal1), fVal1)); +			} else { +			    textype fVal1 = texto(c.tex(tex, f0, f1f + 0.5f, f2)); +			    textype fVal2 = texto(c.tex(tex, f0, f1f + 1.5f, f2)); +			    fVal += texfrom(fVal1 + (f1_ - h5 - f1f_) * (fVal2 - fVal1)); +			} + +//			fVal += c.tex(tex, f0, f1, f2);  			f0 += 1.0f;  			f1 += a1;  			f2 += a2; @@ -308,6 +327,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,  		for (int s = startSlice; s < endSlice; ++s)  		{  			fVal += c.tex(tex, f0, f1, f2); +  			f0 += 1.0f;  			f1 += a1;  			f2 += a2; diff --git a/cuda/3d/rounding.h b/cuda/3d/rounding.h new file mode 100644 index 0000000..a263f84 --- /dev/null +++ b/cuda/3d/rounding.h @@ -0,0 +1,9 @@ +#include <cuda_fp16.h> + +#define texto __float2half +#define texfrom __half2float +#define textype half + +//#define texto +//#define texfrom +//#define textype float | 
