From 3cf63d335ebe392a8c77f0c90395c18150647aeb Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 29 Mar 2019 21:21:29 +0100 Subject: Adjust adjoint to line integral scaling --- cuda/2d/algo.cu | 12 +++++------ cuda/2d/fan_bp.cu | 62 ++++++++++++++++++++++++++++++++++++++++--------------- cuda/2d/par_bp.cu | 20 +++++++++++++++--- 3 files changed, 68 insertions(+), 26 deletions(-) (limited to 'cuda/2d') diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index b4c2864..11422ff 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -258,10 +258,10 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit if (!ok) return false; - // rescale sinogram to adjust for pixel size - processSino(D_sinoData, fSinogramScale, - //1.0f/(fPixelSize*fPixelSize), - sinoPitch, dims); + // rescale sinogram + if (fSinogramScale != 1.0f) + processSino(D_sinoData, fSinogramScale, + sinoPitch, dims); ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch, dims, @@ -331,11 +331,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, if (parProjs) { assert(!fanProjs); return BP(D_volumeData, volumePitch, D_projData, projPitch, - dims, parProjs, outputScale); + dims, parProjs, fOutputScale * outputScale); } else { assert(fanProjs); return FanBP(D_volumeData, volumePitch, D_projData, projPitch, - dims, fanProjs, outputScale); + dims, fanProjs, fOutputScale * outputScale); } } diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index dac3ac2..2072cd4 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -48,7 +48,7 @@ const unsigned int g_anglesPerBlock = 16; const unsigned int g_blockSliceSize = 32; const unsigned int g_blockSlices = 16; -const unsigned int g_MaxAngles = 2560; +const unsigned int g_MaxAngles = 2240; __constant__ float gC_SrcX[g_MaxAngles]; __constant__ float gC_SrcY[g_MaxAngles]; @@ -56,6 +56,7 @@ __constant__ float gC_DetSX[g_MaxAngles]; __constant__ float gC_DetSY[g_MaxAngles]; __constant__ float gC_DetUX[g_MaxAngles]; __constant__ float gC_DetUY[g_MaxAngles]; +__constant__ float gC_Scale[g_MaxAngles]; static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) @@ -96,8 +97,6 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s float fVal = 0.0f; float fA = startAngle + 0.5f; - // TODO: Distance correction? - for (int angle = startAngle; angle < endAngle; ++angle) { const float fSrcX = gC_SrcX[angle]; @@ -106,15 +105,24 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s const float fDetSY = gC_DetSY[angle]; const float fDetUX = gC_DetUX[angle]; const float fDetUY = gC_DetUY[angle]; + const float fScale = gC_Scale[angle]; const float fXD = fSrcX - fX; const float fYD = fSrcY - fY; const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX; const float fDen = fDetUX * fYD - fDetUY * fXD; - - const float fT = fNum / fDen; - fVal += tex2D(gT_FanProjTexture, fT, fA); + + // fDen = || u (x-s) || (2x2 determinant) + + // Scale factor is the approximate number of rays traversing this pixel, + // given by the inverse size of a detector pixel scaled by the magnification + // factor of this pixel. + // Magnification factor is || u (d-s) || / || u (x-s) || + + const float fr = __fdividef(1.0f, fDen); + const float fT = fNum * fr; + fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr; fA += 1.0f; } @@ -148,8 +156,6 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in float fVal = 0.0f; float fA = startAngle + 0.5f; - // TODO: Distance correction? - for (int angle = startAngle; angle < endAngle; ++angle) { const float fSrcX = gC_SrcX[angle]; @@ -158,6 +164,7 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in const float fDetSY = gC_DetSY[angle]; const float fDetUX = gC_DetUX[angle]; const float fDetUY = gC_DetUY[angle]; + const float fScale = gC_Scale[angle]; // TODO: Optimize these loops... float fX = fXb; @@ -169,9 +176,10 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX; const float fDen = fDetUX * fYD - fDetUY * fXD; - - const float fT = fNum / fDen; - fVal += tex2D(gT_FanProjTexture, fT, fA); + const float fr = __fdividef(1.0f, fDen); + + const float fT = fNum * fr; + fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr; fY -= fSubStep; } fX += fSubStep; @@ -202,8 +210,6 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi float* volData = (float*)D_volData; - // TODO: Distance correction? - // TODO: Constant memory vs parameters. const float fSrcX = gC_SrcX[0]; const float fSrcY = gC_SrcY[0]; @@ -211,15 +217,17 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi const float fDetSY = gC_DetSY[0]; const float fDetUX = gC_DetUX[0]; const float fDetUY = gC_DetUY[0]; + const float fScale = gC_Scale[0]; const float fXD = fSrcX - fX; const float fYD = fSrcY - fY; const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX; const float fDen = fDetUX * fYD - fDetUY * fXD; - - const float fT = fNum / fDen; - const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); + const float fr = __fdividef(1.0f, fDen); + + const float fT = fNum * fr; + const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f) * fScale * fr; volData[Y*volPitch+X] += fVal * fOutputScale; } @@ -248,7 +256,7 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un float fVal = 0.0f; float fA = startAngle + 0.5f; - // TODO: Distance correction? + // TODO: Update for new projection scaling for (int angle = startAngle; angle < endAngle; ++angle) { @@ -299,6 +307,13 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, #undef TRANSFER_TO_CONSTANT + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY); + double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize; + tmp[i] = (float)scale; + } + cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + delete[] tmp; dim3 dimBlock(g_blockSlices, g_blockSliceSize); @@ -346,6 +361,14 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, #undef TRANSFER_TO_CONSTANT + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY); + double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize; + tmp[i] = (float)scale; + } + cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + + delete[] tmp; dim3 dimBlock(g_blockSlices, g_blockSliceSize); @@ -389,6 +412,11 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, #undef TRANSFER_TO_CONSTANT + double detsize = sqrt(angles[angle].fDetUX * angles[angle].fDetUX + angles[angle].fDetUY * angles[angle].fDetUY); + double scale = (angles[angle].fDetUX * (angles[angle].fSrcY - angles[angle].fDetSY) - angles[angle].fDetUY * (angles[angle].fSrcX - angles[angle].fDetSX)) / detsize; + float tmp = (float)scale; + cudaMemcpyToSymbol(gC_Scale, &tmp, sizeof(float), 0, cudaMemcpyHostToDevice); + dim3 dimBlock(g_blockSlices, g_blockSliceSize); dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 09a6554..21484b1 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -53,6 +53,7 @@ const unsigned int g_MaxAngles = 2560; __constant__ float gC_angle_scaled_sin[g_MaxAngles]; __constant__ float gC_angle_scaled_cos[g_MaxAngles]; __constant__ float gC_angle_offset[g_MaxAngles]; +__constant__ float gC_angle_scale[g_MaxAngles]; static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) { @@ -70,6 +71,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } +// TODO: Templated version with/without scale? (Or only the global outputscale) __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; @@ -97,9 +99,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star const float scaled_cos_theta = gC_angle_scaled_cos[angle]; const float scaled_sin_theta = gC_angle_scaled_sin[angle]; const float TOffset = gC_angle_offset[angle]; + const float scale = gC_angle_scale[angle]; const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset; - fVal += tex2D(gT_projTexture, fT, fA); + fVal += tex2D(gT_projTexture, fT, fA) * scale; fA += 1.0f; } @@ -138,6 +141,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s const float cos_theta = gC_angle_scaled_cos[angle]; const float sin_theta = gC_angle_scaled_sin[angle]; const float TOffset = gC_angle_offset[angle]; + const float scale = gC_angle_scale[angle]; float fT = fX * cos_theta - fY * sin_theta + TOffset; @@ -145,7 +149,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s float fTy = fT; fT += fSubStep * cos_theta; for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { - fVal += tex2D(gT_projTexture, fTy, fA); + fVal += tex2D(gT_projTexture, fTy, fA) * scale; fTy -= fSubStep * sin_theta; } } @@ -172,6 +176,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset const float fT = fX * angle_cos - fY * angle_sin + offset; const float fVal = tex2D(gT_projTexture, fT, 0.5f); + // NB: The 'scale' constant in devBP has been folded into fOutputScale by the caller here + D_volData[Y*volPitch+X] += fVal * fOutputScale; } @@ -186,27 +192,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, float* angle_scaled_sin = new float[dims.iProjAngles]; float* angle_scaled_cos = new float[dims.iProjAngles]; float* angle_offset = new float[dims.iProjAngles]; + float* angle_scale = new float[dims.iProjAngles]; bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); for (unsigned int i = 0; i < dims.iProjAngles; ++i) { double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX; angle_scaled_cos[i] = angles[i].fRayY / d; - angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs + angle_scaled_sin[i] = -angles[i].fRayX / d; angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d; + angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d); } + //fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]); + //fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY)); cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); assert(e1 == cudaSuccess); assert(e2 == cudaSuccess); assert(e3 == cudaSuccess); + assert(e4 == cudaSuccess); delete[] angle_scaled_sin; delete[] angle_scaled_cos; delete[] angle_offset; + delete[] angle_scale; dim3 dimBlock(g_blockSlices, g_blockSliceSize); dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, @@ -267,6 +280,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch, float angle_scaled_cos = angles[angle].fRayY / d; float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d; + fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d); dim3 dimBlock(g_blockSlices, g_blockSliceSize); dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, -- cgit v1.2.3