summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/algo.cu8
-rw-r--r--cuda/2d/astra.cu28
-rw-r--r--cuda/2d/cgls.cu14
-rw-r--r--cuda/2d/em.cu18
-rw-r--r--cuda/2d/sart.cu27
-rw-r--r--cuda/2d/sirt.cu26
-rw-r--r--cuda/2d/util.cu24
-rw-r--r--cuda/2d/util.h7
8 files changed, 92 insertions, 60 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 71cbfb3..333481a 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -214,11 +214,11 @@ bool ReconAlgo::setMaxConstraint(float fMax)
bool ReconAlgo::allocateBuffers()
{
bool ok;
- ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
+ ok = allocateVolumeData(D_volumeData, volumePitch, dims);
if (!ok)
return false;
- ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
+ ok = allocateProjectionData(D_sinoData, sinoPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
D_volumeData = 0;
@@ -226,7 +226,7 @@ bool ReconAlgo::allocateBuffers()
}
if (useVolumeMask) {
- ok = allocateVolume(D_maskData, dims.iVolWidth, dims.iVolHeight, maskPitch);
+ ok = allocateVolumeData(D_maskData, maskPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
cudaFree(D_sinoData);
@@ -237,7 +237,7 @@ bool ReconAlgo::allocateBuffers()
}
if (useSinogramMask) {
- ok = allocateVolume(D_smaskData, dims.iProjDets, dims.iProjAngles, smaskPitch);
+ ok = allocateProjectionData(D_smaskData, smaskPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
cudaFree(D_sinoData);
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index e317f6c..4e69e8f 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -240,13 +240,13 @@ bool AstraFBP::init(int iGPUIndex)
}
}
- bool ok = allocateVolume(pData->D_volumeData, pData->dims.iVolWidth, pData->dims.iVolHeight, pData->volumePitch);
+ bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
if (!ok)
{
return false;
}
- ok = allocateVolume(pData->D_sinoData, pData->dims.iProjDets, pData->dims.iProjAngles, pData->sinoPitch);
+ ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims);
if (!ok)
{
cudaFree(pData->D_volumeData);
@@ -304,7 +304,7 @@ bool AstraFBP::run()
return false;
}
- zeroVolume(pData->D_volumeData, pData->volumePitch, pData->dims.iVolWidth, pData->dims.iVolHeight);
+ zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
bool ok = false;
@@ -604,7 +604,7 @@ bool BPalgo::init()
bool BPalgo::iterate(unsigned int)
{
// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant
- zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_volumeData, volumePitch, dims);
callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch);
return true;
}
@@ -614,7 +614,7 @@ float BPalgo::computeDiffNorm()
float *D_projData;
unsigned int projPitch;
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
+ allocateProjectionData(D_projData, projPitch, dims);
cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice);
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
@@ -668,14 +668,14 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
float* D_volumeData;
unsigned int volumePitch;
- ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
+ ok = allocateVolumeData(D_volumeData, volumePitch, dims);
if (!ok)
return false;
float* D_sinoData;
unsigned int sinoPitch;
- ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
+ ok = allocateProjectionData(D_sinoData, sinoPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
return false;
@@ -690,7 +690,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
return false;
}
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_sinoData, sinoPitch, dims);
ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, 1.0f);
if (!ok) {
cudaFree(D_volumeData);
@@ -755,14 +755,14 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
float* D_volumeData;
unsigned int volumePitch;
- ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
+ ok = allocateVolumeData(D_volumeData, volumePitch, dims);
if (!ok)
return false;
float* D_sinoData;
unsigned int sinoPitch;
- ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
+ ok = allocateProjectionData(D_sinoData, sinoPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
return false;
@@ -777,7 +777,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
return false;
}
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_sinoData, sinoPitch, dims);
// TODO: Turn this geometry conversion into a util function
SFanProjection* projs = new SFanProjection[dims.iProjAngles];
@@ -866,14 +866,14 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
float* D_volumeData;
unsigned int volumePitch;
- ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
+ ok = allocateVolumeData(D_volumeData, volumePitch, dims);
if (!ok)
return false;
float* D_sinoData;
unsigned int sinoPitch;
- ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
+ ok = allocateProjectionData(D_sinoData, sinoPitch, dims);
if (!ok) {
cudaFree(D_volumeData);
return false;
@@ -888,7 +888,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
return false;
}
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_sinoData, sinoPitch, dims);
ok = FanFP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, 1.0f);
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index df8db0b..f4175e1 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -73,16 +73,16 @@ void CGLS::reset()
bool CGLS::init()
{
// Lifetime of z: within an iteration
- allocateVolume(D_z, dims.iVolWidth, dims.iVolHeight, zPitch);
+ allocateVolumeData(D_z, zPitch, dims);
// Lifetime of p: full algorithm
- allocateVolume(D_p, dims.iVolWidth, dims.iVolHeight, pPitch);
+ allocateVolumeData(D_p, pPitch, dims);
// Lifetime of r: full algorithm
- allocateVolume(D_r, dims.iProjDets, dims.iProjAngles, rPitch);
+ allocateProjectionData(D_r, rPitch, dims);
// Lifetime of w: within an iteration
- allocateVolume(D_w, dims.iProjDets, dims.iProjAngles, wPitch);
+ allocateProjectionData(D_w, wPitch, dims);
// TODO: check if allocations succeeded
return true;
@@ -134,7 +134,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
- zeroVolume(D_p, pPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_p, pPitch, dims);
callBP(D_p, pPitch, D_r, rPitch);
if (useVolumeMask)
processVol<opMul>(D_p, D_maskData, pPitch, dims.iVolWidth, dims.iVolHeight);
@@ -150,7 +150,7 @@ bool CGLS::iterate(unsigned int iterations)
for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) {
// w = A*p
- zeroVolume(D_w, wPitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_w, wPitch, dims);
callFP(D_p, pPitch, D_w, wPitch, 1.0f);
// alpha = gamma / <w,w>
@@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
- zeroVolume(D_z, zPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_z, zPitch, dims);
callBP(D_z, zPitch, D_r, rPitch);
if (useVolumeMask)
processVol<opMul>(D_z, D_maskData, zPitch, dims.iVolWidth, dims.iVolHeight);
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index cf3f10c..b281516 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -73,14 +73,14 @@ void EM::reset()
bool EM::init()
{
- allocateVolume(D_pixelWeight, dims.iVolWidth, dims.iVolHeight, pixelPitch);
- zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_pixelWeight, pixelPitch, dims);
+ zeroVolumeData(D_pixelWeight, pixelPitch, dims);
- allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_tmpData, tmpPitch, dims);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
+ allocateProjectionData(D_projData, projPitch, dims);
+ zeroProjectionData(D_projData, projPitch, dims);
// We can't precompute pixelWeights when using a volume mask
#if 0
@@ -94,7 +94,7 @@ bool EM::init()
bool EM::precomputeWeights()
{
- zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_pixelWeight, pixelPitch, dims);
#if 0
if (useSinogramMask) {
callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
@@ -129,14 +129,14 @@ bool EM::iterate(unsigned int iterations)
for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) {
// Do FP of volumeData
- zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_projData, projPitch, dims);
callFP(D_volumeData, volumePitch, D_projData, projPitch, 1.0f);
// Divide sinogram by FP (into projData)
processVol<opDividedBy>(D_projData, D_sinoData, projPitch, dims.iProjDets, dims.iProjAngles);
// Do BP of projData into tmpData
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP(D_tmpData, tmpPitch, D_projData, projPitch);
// Multiply volumeData with tmpData divided by pixel weights
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 7f499ce..79c00ef 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -105,15 +105,18 @@ void SART::reset()
bool SART::init()
{
if (useVolumeMask) {
- allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_tmpData, tmpPitch, dims);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
}
- allocateVolume(D_projData, dims.iProjDets, 1, projPitch);
- zeroVolume(D_projData, projPitch, dims.iProjDets, 1);
+ // NB: Non-standard dimensions
+ SDimensions linedims = dims;
+ linedims.iProjAngles = 1;
+ allocateProjectionData(D_projData, projPitch, linedims);
+ zeroProjectionData(D_projData, projPitch, linedims);
- allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch);
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ allocateProjectionData(D_lineWeight, linePitch, dims);
+ zeroProjectionData(D_lineWeight, linePitch, dims);
// We can't precompute lineWeights when using a mask
if (!useVolumeMask)
@@ -138,13 +141,13 @@ bool SART::setProjectionOrder(int* _projectionOrder, int _projectionCount)
bool SART::precomputeWeights()
{
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_lineWeight, linePitch, dims);
if (useVolumeMask) {
callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f);
} else {
// Allocate tmpData temporarily
- allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_tmpData, tmpPitch, dims);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
@@ -193,7 +196,7 @@ bool SART::iterate(unsigned int iterations)
if (useVolumeMask) {
// BP, mask, and add back
// TODO: Try putting the masking directly in the BP
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
} else {
@@ -216,8 +219,8 @@ float SART::computeDiffNorm()
{
unsigned int pPitch;
float *D_p;
- allocateVolume(D_p, dims.iProjDets, dims.iProjAngles, pPitch);
- zeroVolume(D_p, pPitch, dims.iProjDets, dims.iProjAngles);
+ allocateProjectionData(D_p, pPitch, dims);
+ zeroProjectionData(D_p, pPitch, dims);
// copy sinogram to D_p
cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index eb65962..1b0891a 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -88,17 +88,17 @@ void SIRT::reset()
bool SIRT::init()
{
- allocateVolume(D_pixelWeight, dims.iVolWidth, dims.iVolHeight, pixelPitch);
- zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_pixelWeight, pixelPitch, dims);
+ zeroVolumeData(D_pixelWeight, pixelPitch, dims);
- allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ allocateVolumeData(D_tmpData, tmpPitch, dims);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
+ allocateProjectionData(D_projData, projPitch, dims);
+ zeroProjectionData(D_projData, projPitch, dims);
- allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch);
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ allocateProjectionData(D_lineWeight, linePitch, dims);
+ zeroProjectionData(D_lineWeight, linePitch, dims);
// We can't precompute lineWeights and pixelWeights when using a mask
if (!useVolumeMask && !useSinogramMask)
@@ -110,7 +110,7 @@ bool SIRT::init()
bool SIRT::precomputeWeights()
{
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ zeroProjectionData(D_lineWeight, linePitch, dims);
if (useVolumeMask) {
callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f);
} else {
@@ -125,7 +125,7 @@ bool SIRT::precomputeWeights()
}
- zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_pixelWeight, pixelPitch, dims);
if (useSinogramMask) {
callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
} else {
@@ -160,7 +160,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD
freeMinMaxMasks = true;
bool ok = true;
if (pfMinMaskData) {
- allocateVolume(D_minMaskData, dims.iVolWidth, dims.iVolHeight, minMaskPitch);
+ allocateVolumeData(D_minMaskData, minMaskPitch, dims);
ok = copyVolumeToDevice(pfMinMaskData, iPitch,
dims.iVolWidth, dims.iVolHeight,
D_minMaskData, minMaskPitch);
@@ -169,7 +169,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD
return false;
if (pfMaxMaskData) {
- allocateVolume(D_maxMaskData, dims.iVolWidth, dims.iVolHeight, maxMaskPitch);
+ allocateVolumeData(D_maxMaskData, maxMaskPitch, dims);
ok = copyVolumeToDevice(pfMaxMaskData, iPitch,
dims.iVolWidth, dims.iVolHeight,
D_maxMaskData, maxMaskPitch);
@@ -204,7 +204,7 @@ bool SIRT::iterate(unsigned int iterations)
processVol<opMul>(D_projData, D_lineWeight, projPitch, dims.iProjDets, dims.iProjAngles);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP(D_tmpData, tmpPitch, D_projData, projPitch);
diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu
index 8bb2f2f..d5cbe44 100644
--- a/cuda/2d/util.cu
+++ b/cuda/2d/util.cu
@@ -97,6 +97,30 @@ void zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned in
ASTRA_CUDA_ASSERT(err);
}
+bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims)
+{
+ // TODO: memory order
+ return allocateVolume(D_ptr, dims.iVolWidth, dims.iVolHeight, pitch);
+}
+
+bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims)
+{
+ // TODO: memory order
+ return allocateVolume(D_ptr, dims.iProjDets, dims.iProjAngles, pitch);
+}
+
+void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
+{
+ // TODO: memory order
+ zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
+{
+ // TODO: memory order
+ zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
template <unsigned int blockSize>
__global__ void reduce1D(float *g_idata, float *g_odata, unsigned int n)
diff --git a/cuda/2d/util.h b/cuda/2d/util.h
index ed4a45c..3cffa08 100644
--- a/cuda/2d/util.h
+++ b/cuda/2d/util.h
@@ -73,9 +73,14 @@ bool copySinogramToDevice(const float* in_data, unsigned int in_pitch,
float* outD_data, unsigned int out_pitch);
bool allocateVolume(float*& D_ptr, unsigned int width, unsigned int height, unsigned int& pitch);
-
void zeroVolume(float* D_data, unsigned int pitch, unsigned int width, unsigned int height);
+bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims);
+bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims);
+void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims);
+void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims);
+
+
bool cudaTextForceKernelsCompletion();
void reportCudaError(cudaError_t err);