Remove padding in 2D cuda in favour of Border mode

author: Willem Jan Palenstijn <WillemJan.Palenstijn@uantwerpen.be> 2014-04-16 11:12:55 +0000
committer: wpalenst <WillemJan.Palenstijn@uantwerpen.be> 2014-04-16 11:12:55 +0000
commit: 3a6769465bee7d56d0ddff36613b886446421e07 (patch)
tree: 624e85c5d6a4ab19c958a388e3436219693a6296 /cuda/2d/sart.cu
parent: 4dfb881ceb82b07630437e952dec62323977ab56 (diff)
download: astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.gz
astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.bz2
astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.xz
astra-3a6769465bee7d56d0ddff36613b886446421e07.zip
1 files changed, 28 insertions, 32 deletions
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index a40176d..7f499ce 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -39,14 +39,13 @@ $Id$
 
 namespace astraCUDA {
 
-
+// FIXME: Remove these functions. (Outdated)
 __global__ void devMUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width)
 {
 	unsigned int x = threadIdx.x + 16*blockIdx.x;
 	if (x >= width) return;
 
-	// Copy result down and left one pixel.
-	pfOut[x + pitch] = pfOut[x + 1] * pfIn[x + 1];
+	pfOut[x] *= pfIn[x];
 }
 
 void MUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width)
@@ -106,18 +105,15 @@ void SART::reset()
 bool SART::init()
 {
 	if (useVolumeMask) {
-		allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch);
-		zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+		allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
+		zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 	}
 
-	// HACK: D_projData consists of two lines. The first is used padded,
-	// the second unpadded. This is to satisfy the alignment requirements
-	// of resp. FP and BP_SART.
-	allocateVolume(D_projData, dims.iProjDets+2, 2, projPitch);
-	zeroVolume(D_projData, projPitch, dims.iProjDets+2, 1);
+	allocateVolume(D_projData, dims.iProjDets, 1, projPitch);
+	zeroVolume(D_projData, projPitch, dims.iProjDets, 1);
 	
-	allocateVolume(D_lineWeight, dims.iProjDets+2, dims.iProjAngles, linePitch);
-	zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles);
+	allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch);
+	zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
 
 	// We can't precompute lineWeights when using a mask
 	if (!useVolumeMask)
@@ -142,23 +138,23 @@ bool SART::setProjectionOrder(int* _projectionOrder, int _projectionCount)
 
 bool SART::precomputeWeights()
 {
-	zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles);
+	zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
 	if (useVolumeMask) {
 		callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f);
 	} else {
 		// Allocate tmpData temporarily
-		allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch);
-		zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+		allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
+		zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 
 
-		processVol<opSet, VOL>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+		processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 		callFP(D_tmpData, tmpPitch, D_lineWeight, linePitch, 1.0f);
 
 
 		cudaFree(D_tmpData);
 		D_tmpData = 0;
 	}
-	processVol<opInvert, SINO>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+	processVol<opInvert>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
 
 	return true;
 }
@@ -181,12 +177,12 @@ bool SART::iterate(unsigned int iterations)
 		}
 
 		// copy one line of sinogram to projection data
-		cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), 1, cudaMemcpyDeviceToDevice);
+		cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), 1, cudaMemcpyDeviceToDevice);
 
 		// do FP, subtracting projection from sinogram
 		if (useVolumeMask) {
-				cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice);
-				processVol<opMul, VOL>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+				cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+				processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 				callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f);
 		} else {
 				callFP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, -1.0f);
@@ -197,17 +193,17 @@ bool SART::iterate(unsigned int iterations)
 		if (useVolumeMask) {
 			// BP, mask, and add back
 			// TODO: Try putting the masking directly in the BP
-			zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+			zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
-			processVol<opAddMul, VOL>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+			processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
 		} else {
 			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
 		}
 
 		if (useMinConstraint)
-			processVol<opClampMin, VOL>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+			processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
 		if (useMaxConstraint)
-			processVol<opClampMax, VOL>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+			processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
 
 		iteration++;
 
@@ -220,16 +216,16 @@ float SART::computeDiffNorm()
 {
 	unsigned int pPitch;
 	float *D_p;
-	allocateVolume(D_p, dims.iProjDets+2, dims.iProjAngles, pPitch);
-	zeroVolume(D_p, pPitch, dims.iProjDets+2, dims.iProjAngles);
+	allocateVolume(D_p, dims.iProjDets, dims.iProjAngles, pPitch);
+	zeroVolume(D_p, pPitch, dims.iProjDets, dims.iProjAngles);
 
 	// copy sinogram to D_p
-	cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+	cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
 
 	// do FP, subtracting projection from sinogram
 	if (useVolumeMask) {
-			cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice);
-			processVol<opMul, VOL>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+			cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+			processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
 			callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
 	} else {
 			callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
@@ -237,7 +233,7 @@ float SART::computeDiffNorm()
 
 
 	// compute norm of D_p
-	float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles, 1, 0);
+	float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles);
 
 	cudaFree(D_p);
 
@@ -267,11 +263,11 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
 {
 	if (angles) {
 		assert(!fanProjs);
-		return BP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch,
+		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
 		               angle, dims, angles, TOffsets);
 	} else {
 		assert(fanProjs);
-		return FanBP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch,
+		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
 		                  angle, dims, fanProjs);
 	}
author	Willem Jan Palenstijn <WillemJan.Palenstijn@uantwerpen.be>	2014-04-16 11:12:55 +0000
committer	wpalenst <WillemJan.Palenstijn@uantwerpen.be>	2014-04-16 11:12:55 +0000
commit	3a6769465bee7d56d0ddff36613b886446421e07 (patch)
tree	624e85c5d6a4ab19c958a388e3436219693a6296 /cuda/2d/sart.cu
parent	4dfb881ceb82b07630437e952dec62323977ab56 (diff)
download	astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.gz astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.bz2 astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.xz astra-3a6769465bee7d56d0ddff36613b886446421e07.zip