From d85a660f064e8130b27e11c7fd762221c754c315 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 26 Jan 2017 14:57:57 +0100
Subject: Start work on CFloat32Data3DGPU to allow persistent/external GPU
 memory

---
 src/CompositeGeometryManager.cpp         | 260 ++++++++++++++++++++++++++-----
 src/CudaBackProjectionAlgorithm3D.cpp    |  20 ++-
 src/CudaFDKAlgorithm3D.cpp               |  12 +-
 src/CudaForwardProjectionAlgorithm3D.cpp |   8 +-
 src/Float32Data3D.cpp                    |   6 +-
 src/Float32Data3DGPU.cpp                 |  98 ++++++++++++
 src/Float32Data3DMemory.cpp              |   2 +-
 src/Float32ProjectionData3DGPU.cpp       |  71 +++++++++
 src/Float32ProjectionData3DMemory.cpp    |   2 +-
 src/Float32VolumeData3DGPU.cpp           |  71 +++++++++
 src/Float32VolumeData3DMemory.cpp        |   2 +-
 11 files changed, 486 insertions(+), 66 deletions(-)
 create mode 100644 src/Float32Data3DGPU.cpp
 create mode 100644 src/Float32ProjectionData3DGPU.cpp
 create mode 100644 src/Float32VolumeData3DGPU.cpp

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index c3af228..74466db 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -39,6 +39,8 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 #include "astra/CudaProjector3D.h"
 #include "astra/Float32ProjectionData3DMemory.h"
 #include "astra/Float32VolumeData3DMemory.h"
+#include "astra/Float32ProjectionData3DGPU.h"
+#include "astra/Float32VolumeData3DGPU.h"
 #include "astra/Logging.h"
 
 #include "../cuda/3d/mem3d.h"
@@ -97,6 +99,127 @@ CCompositeGeometryManager::CCompositeGeometryManager()
 //   (First approach: 0.5/0.5)
 
 
+
+
+
+class _AstraExport CFloat32CustomGPUMemory {
+public:
+    astraCUDA3d::MemHandle3D hnd; // Only required to be valid between allocate/free
+    virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero)=0;
+    virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0;
+    virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0;
+    virtual bool freeGPUMemory()=0;
+	virtual ~CFloat32CustomGPUMemory() { }
+};
+
+class CFloat32ExistingGPUMemory : public astra::CFloat32CustomGPUMemory {
+public:
+    CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d);
+    virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero);
+    virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos);
+    virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos);
+    virtual bool freeGPUMemory();
+
+protected:
+    unsigned int x, y, z;
+};
+
+class CFloat32DefaultGPUMemory : public astra::CFloat32CustomGPUMemory {
+public:
+	CFloat32DefaultGPUMemory(CFloat32Data3DMemory* d) {
+		ptr = d->getData();
+	}
+	virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero) {
+		hnd = astraCUDA3d::allocateGPUMemory(x, y, z, zero);
+		return (bool)hnd;
+	}
+	virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
+		return astraCUDA3d::copyToGPUMemory(ptr, hnd, pos);
+	}
+	virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
+		return astraCUDA3d::copyFromGPUMemory(ptr, hnd, pos);
+	}
+	virtual bool freeGPUMemory() {
+		return astraCUDA3d::freeGPUMemory(hnd);
+	}
+
+protected:
+	float *ptr;
+};
+
+
+
+CFloat32ExistingGPUMemory::CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d)
+{
+	hnd = d->getHandle();
+	x = d->getWidth();
+	y = d->getHeight();
+	z = d->getDepth();
+}
+
+bool CFloat32ExistingGPUMemory::allocateGPUMemory(unsigned int x_, unsigned int y_, unsigned int z_, astraCUDA3d::Mem3DZeroMode zero) {
+    assert(x_ == x);
+    assert(y_ == y);
+    assert(z_ == z);
+
+    if (zero == astraCUDA3d::INIT_ZERO)
+        return astraCUDA3d::zeroGPUMemory(hnd, x, y, z);
+    else
+        return true;
+}
+bool CFloat32ExistingGPUMemory::copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
+    assert(pos.nx == x);
+    assert(pos.ny == y);
+    assert(pos.nz == z);
+    assert(pos.pitch == x);
+    assert(pos.subx == 0);
+    assert(pos.suby == 0);
+    assert(pos.subnx == x);
+    assert(pos.subny == y);
+
+    // These are less necessary than x/y, but allowing access to
+    // subvolumes needs an interface change
+    assert(pos.subz == 0);
+    assert(pos.subnz == z);
+
+    return true;
+}
+bool CFloat32ExistingGPUMemory::copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
+    assert(pos.nx == x);
+    assert(pos.ny == y);
+    assert(pos.nz == z);
+    assert(pos.pitch == x);
+    assert(pos.subx == 0);
+    assert(pos.suby == 0);
+    assert(pos.subnx == x);
+    assert(pos.subny == y);
+
+    // These are less necessary than x/y, but allowing access to
+    // subvolumes needs an interface change
+    assert(pos.subz == 0);
+    assert(pos.subnz == z);
+
+    return true;
+}
+bool CFloat32ExistingGPUMemory::freeGPUMemory() {
+    return true;
+}
+
+
+CFloat32CustomGPUMemory * createGPUMemoryHandler(CFloat32Data3D *d) {
+	CFloat32Data3DMemory *dMem = dynamic_cast<CFloat32Data3DMemory*>(d);
+	CFloat32Data3DGPU *dGPU = dynamic_cast<CFloat32Data3DGPU*>(d);
+
+	if (dMem)
+		return new CFloat32DefaultGPUMemory(dMem);
+	else
+		return new CFloat32ExistingGPUMemory(dGPU);
+}
+
+
+
+
+
 bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
 {
 	int maxBlockDim = astraCUDA3d::maxBlockDimension();
@@ -280,7 +403,7 @@ CCompositeGeometryManager::CVolumePart::~CVolumePart()
 	delete pGeom;
 }
 
-void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z)
+void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z) const
 {
 	if (!pGeom) {
 		x = y = z = 0;
@@ -292,13 +415,28 @@ void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_
 	z = pGeom->getGridSliceCount();
 }
 
-size_t CCompositeGeometryManager::CPart::getSize()
+size_t CCompositeGeometryManager::CPart::getSize() const
 {
 	size_t x, y, z;
 	getDims(x, y, z);
 	return x * y * z;
 }
 
+bool CCompositeGeometryManager::CPart::isFull() const
+{
+	size_t x, y, z;
+	getDims(x, y, z);
+	return x == pData->getWidth() &&
+	       y == pData->getHeight() &&
+	       z == pData->getDepth();
+}
+
+bool CCompositeGeometryManager::CPart::canSplitAndReduce() const
+{
+	return dynamic_cast<CFloat32Data3DMemory *>(pData) != 0;
+}
+
+
 
 static bool testVolumeRange(const std::pair<double, double>& fullRange,
                             const CVolumeGeometry3D *pVolGeom,
@@ -334,6 +472,9 @@ static bool testVolumeRange(const std::pair<double, double>& fullRange,
 
 CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other)
 {
+	if (!canSplitAndReduce())
+		return clone();
+
 	const CProjectionPart *other = dynamic_cast<const CProjectionPart *>(_other);
 	assert(other);
 
@@ -654,7 +795,7 @@ static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometr
 // - maybe all approximately the same size?
 void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
 {
-	if (true) {
+	if (canSplitAndReduce()) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
@@ -698,12 +839,14 @@ void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::T
 
 			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
+	} else {
+		out.push_back(boost::shared_ptr<CPart>(clone()));
 	}
 }
 
 void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
 {
-	if (true) {
+	if (canSplitAndReduce()) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
@@ -747,12 +890,14 @@ void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::T
 
 			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
+	} else {
+		out.push_back(boost::shared_ptr<CPart>(clone()));
 	}
 }
 
 void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
 {
-	if (true) {
+	if (canSplitAndReduce()) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
@@ -796,6 +941,8 @@ void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::T
 
 			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
+	} else {
+		out.push_back(boost::shared_ptr<CPart>(clone()));
 	}
 }
 
@@ -815,7 +962,7 @@ CCompositeGeometryManager::CProjectionPart::~CProjectionPart()
 	delete pGeom;
 }
 
-void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z)
+void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z) const
 {
 	if (!pGeom) {
 		x = y = z = 0;
@@ -831,6 +978,9 @@ void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, s
 
 CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other)
 {
+	if (!canSplitAndReduce())
+		return clone();
+
 	const CVolumePart *other = dynamic_cast<const CVolumePart *>(_other);
 	assert(other);
 
@@ -868,7 +1018,7 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re
 
 void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
 {
-	if (true) {
+	if (canSplitAndReduce()) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
@@ -903,6 +1053,8 @@ void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManage
 
 			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
+	} else {
+		out.push_back(boost::shared_ptr<CPart>(clone()));
 	}
 }
 
@@ -914,7 +1066,7 @@ void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManage
 
 void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
 {
-	if (true) {
+	if (canSplitAndReduce()) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
@@ -949,6 +1101,8 @@ void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManage
 
 			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
+	} else {
+		out.push_back(boost::shared_ptr<CPart>(clone()));
 	}
 
 }
@@ -959,8 +1113,8 @@ CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjecti
 }
 
 CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector,
-                                            CFloat32VolumeData3DMemory *pVolData,
-                                            CFloat32ProjectionData3DMemory *pProjData)
+                                            CFloat32VolumeData3D *pVolData,
+                                            CFloat32ProjectionData3D *pProjData)
 {
 	ASTRA_DEBUG("CCompositeGeometryManager::createJobFP");
 	// Create single job for FP
@@ -992,8 +1146,8 @@ CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjecto
 }
 
 CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector,
-                                            CFloat32VolumeData3DMemory *pVolData,
-                                            CFloat32ProjectionData3DMemory *pProjData)
+                                            CFloat32VolumeData3D *pVolData,
+                                            CFloat32ProjectionData3D *pProjData)
 {
 	ASTRA_DEBUG("CCompositeGeometryManager::createJobBP");
 	// Create single job for BP
@@ -1022,8 +1176,8 @@ CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjecto
 	return BP;
 }
 
-bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
-                                     CFloat32ProjectionData3DMemory *pProjData)
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
+                                     CFloat32ProjectionData3D *pProjData)
 {
 	TJobList L;
 	L.push_back(createJobFP(pProjector, pVolData, pProjData));
@@ -1031,8 +1185,8 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeDat
 	return doJobs(L);
 }
 
-bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
-                                     CFloat32ProjectionData3DMemory *pProjData)
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
+                                     CFloat32ProjectionData3D *pProjData)
 {
 	TJobList L;
 	L.push_back(createJobBP(pProjector, pVolData, pProjData));
@@ -1041,8 +1195,8 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
 }
 
 
-bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
-                                     CFloat32ProjectionData3DMemory *pProjData, bool bShortScan,
+bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
+                                     CFloat32ProjectionData3D *pProjData, bool bShortScan,
                                      const float *pfFilter)
 {
 	if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) {
@@ -1061,11 +1215,11 @@ bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeDa
 	return doJobs(L);
 }
 
-bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3D *>& volData, const std::vector<CFloat32ProjectionData3D *>& projData)
 {
 	ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume");
 
-	std::vector<CFloat32VolumeData3DMemory *>::const_iterator i;
+	std::vector<CFloat32VolumeData3D *>::const_iterator i;
 	std::vector<boost::shared_ptr<CPart> > inputs;
 
 	for (i = volData.begin(); i != volData.end(); ++i) {
@@ -1079,7 +1233,7 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector
 		inputs.push_back(boost::shared_ptr<CPart>(input));
 	}
 
-	std::vector<CFloat32ProjectionData3DMemory *>::const_iterator j;
+	std::vector<CFloat32ProjectionData3D *>::const_iterator j;
 	std::vector<boost::shared_ptr<CPart> > outputs;
 
 	for (j = projData.begin(); j != projData.end(); ++j) {
@@ -1115,12 +1269,12 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector
 	return doJobs(L);
 }
 
-bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3D *>& volData, const std::vector<CFloat32ProjectionData3D *>& projData)
 {
 	ASTRA_DEBUG("CCompositeGeometryManager::doBP, multi-volume");
 
 
-	std::vector<CFloat32VolumeData3DMemory *>::const_iterator i;
+	std::vector<CFloat32VolumeData3D *>::const_iterator i;
 	std::vector<boost::shared_ptr<CPart> > outputs;
 
 	for (i = volData.begin(); i != volData.end(); ++i) {
@@ -1134,7 +1288,7 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector
 		outputs.push_back(boost::shared_ptr<CPart>(output));
 	}
 
-	std::vector<CFloat32ProjectionData3DMemory *>::const_iterator j;
+	std::vector<CFloat32ProjectionData3D *>::const_iterator j;
 	std::vector<boost::shared_ptr<CPart> > inputs;
 
 	for (j = projData.begin(); j != projData.end(); ++j) {
@@ -1188,14 +1342,25 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 	if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) {
 		// just zero output?
 		if (zero) {
-			for (size_t z = 0; z < outz; ++z) {
-				for (size_t y = 0; y < outy; ++y) {
-					float* ptr = output->pData->getData();
-					ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
-					ptr += (y + output->subY) * (size_t)output->pData->getWidth();
-					ptr += output->subX;
-					memset(ptr, 0, sizeof(float) * outx);
+			// TODO: This function shouldn't have to know about this difference
+			// between Memory/GPU
+			CFloat32Data3DMemory *hostMem = dynamic_cast<CFloat32Data3DMemory *>(output->pData);
+			if (hostMem) {
+				for (size_t z = 0; z < outz; ++z) {
+					for (size_t y = 0; y < outy; ++y) {
+						float* ptr = hostMem->getData();
+						ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
+						ptr += (y + output->subY) * (size_t)output->pData->getWidth();
+						ptr += output->subX;
+						memset(ptr, 0, sizeof(float) * outx);
+					}
 				}
+			} else {
+				CFloat32Data3DGPU *gpuMem = dynamic_cast<CFloat32Data3DGPU *>(output->pData);
+				assert(gpuMem);
+				assert(output->isFull()); // TODO: zero subset?
+
+				zeroGPUMemory(gpuMem->getHandle(), outx, outy, outz);
 			}
 		}
 		return true;
@@ -1214,10 +1379,11 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 	dstdims.subx = output->subX;
 	dstdims.suby = output->subY;
 	dstdims.subz = output->subZ;
-	float *dst = output->pData->getData();
 
-	astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
-	bool ok = outputMem;
+	CFloat32CustomGPUMemory *dstMem = createGPUMemoryHandler(output->pData);
+
+	bool ok = dstMem->allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
+	if (!ok) ASTRA_ERROR("Error allocating GPU memory");
 
 	for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) {
 		const CCompositeGeometryManager::SJob &j = *i;
@@ -1238,7 +1404,8 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 
 		size_t inx, iny, inz;
 		j.pInput->getDims(inx, iny, inz);
-		astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
+
+		CFloat32CustomGPUMemory *srcMem = createGPUMemoryHandler(j.pInput->pData);
 
 		astraCUDA3d::SSubDimensions3D srcdims;
 		srcdims.nx = j.pInput->pData->getWidth();
@@ -1251,9 +1418,11 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 		srcdims.subx = j.pInput->subX;
 		srcdims.suby = j.pInput->subY;
 		srcdims.subz = j.pInput->subZ;
-		const float *src = j.pInput->pData->getDataConst();
 
-		ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
+		ok = srcMem->allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
+		if (!ok) ASTRA_ERROR("Error allocating GPU memory");
+
+		ok = srcMem->copyToGPUMemory(srcdims);
 		if (!ok) ASTRA_ERROR("Error copying input data to GPU");
 
 		switch (j.eType) {
@@ -1264,7 +1433,7 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 
 			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
 
-			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
+			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, dstMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, srcMem->hnd, detectorSuperSampling, projKernel);
 			if (!ok) ASTRA_ERROR("Error performing sub-FP");
 			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
 		}
@@ -1276,7 +1445,7 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 
 			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
 
-			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling, densityWeighting);
+			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, voxelSuperSampling, densityWeighting);
 			if (!ok) ASTRA_ERROR("Error performing sub-BP");
 			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
 		}
@@ -1292,7 +1461,7 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 			} else {
 				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK");
 
-				ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter);
+				ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter);
 				if (!ok) ASTRA_ERROR("Error performing sub-FDK");
 				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done");
 			}
@@ -1302,17 +1471,20 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
 			assert(false);
 		}
 
-		ok = astraCUDA3d::freeGPUMemory(inputMem);
+		ok = srcMem->freeGPUMemory();
 		if (!ok) ASTRA_ERROR("Error freeing GPU memory");
 
+		delete srcMem;
 	}
 
-	ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
+	ok = dstMem->copyFromGPUMemory(dstdims);
 	if (!ok) ASTRA_ERROR("Error copying output data from GPU");
 	
-	ok = astraCUDA3d::freeGPUMemory(outputMem);
+	ok = dstMem->freeGPUMemory();
 	if (!ok) ASTRA_ERROR("Error freeing GPU memory");
 
+	delete dstMem;
+
 	return true;
 }
 
@@ -1455,6 +1627,8 @@ void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices
 
 bool CCompositeGeometryManager::doJobs(TJobList &jobs)
 {
+	// TODO: Proper clean up if substeps fail (Or as proper as possible)
+
 	ASTRA_DEBUG("CCompositeGeometryManager::doJobs");
 
 	// Sort job list into job set by output part
diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index 223a9a4..27bb968 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -60,8 +60,8 @@ CCudaBackProjectionAlgorithm3D::CCudaBackProjectionAlgorithm3D()
 //----------------------------------------------------------------------------------------
 // Constructor with initialization
 CCudaBackProjectionAlgorithm3D::CCudaBackProjectionAlgorithm3D(CProjector3D* _pProjector, 
-								   CFloat32ProjectionData3DMemory* _pProjectionData, 
-								   CFloat32VolumeData3DMemory* _pReconstruction)
+								   CFloat32ProjectionData3D* _pProjectionData, 
+								   CFloat32VolumeData3D* _pReconstruction)
 {
 	_clear();
 	initialize(_pProjector, _pProjectionData, _pReconstruction);
@@ -145,8 +145,8 @@ bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg)
 //----------------------------------------------------------------------------------------
 // Initialize - C++
 bool CCudaBackProjectionAlgorithm3D::initialize(CProjector3D* _pProjector, 
-								  CFloat32ProjectionData3DMemory* _pSinogram, 
-								  CFloat32VolumeData3DMemory* _pReconstruction)
+								  CFloat32ProjectionData3D* _pSinogram, 
+								  CFloat32VolumeData3D* _pReconstruction)
 {
 	// if already initialized, clear first
 	if (m_bIsInitialized) {
@@ -187,17 +187,21 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 	// check initialized
 	ASTRA_ASSERT(m_bIsInitialized);
 
-	CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
+	CFloat32ProjectionData3D* pSinoMem = dynamic_cast<CFloat32ProjectionData3D*>(m_pSinogram);
 	ASTRA_ASSERT(pSinoMem);
-	CFloat32VolumeData3DMemory* pReconMem = dynamic_cast<CFloat32VolumeData3DMemory*>(m_pReconstruction);
+	CFloat32VolumeData3D* pReconMem = dynamic_cast<CFloat32VolumeData3D*>(m_pReconstruction);
 	ASTRA_ASSERT(pReconMem);
 
 	const CProjectionGeometry3D* projgeom = pSinoMem->getGeometry();
 	const CVolumeGeometry3D& volgeom = *pReconMem->getGeometry();
 
 	if (m_bSIRTWeighting) {
-		astraCudaBP_SIRTWeighted(pReconMem->getData(),
-		                         pSinoMem->getDataConst(),
+		CFloat32ProjectionData3DMemory* pSinoMemory = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
+		ASTRA_ASSERT(pSinoMemory);
+		CFloat32VolumeData3DMemory* pReconMemory = dynamic_cast<CFloat32VolumeData3DMemory*>(m_pReconstruction);
+		ASTRA_ASSERT(pReconMemory);
+		astraCudaBP_SIRTWeighted(pReconMemory->getData(),
+		                         pSinoMemory->getDataConst(),
 		                         &volgeom, projgeom,
 		                         m_iGPUIndex, m_iVoxelSuperSampling);
 	} else {
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index d02db6d..d503351 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -59,8 +59,8 @@ CCudaFDKAlgorithm3D::CCudaFDKAlgorithm3D()
 //----------------------------------------------------------------------------------------
 // Constructor with initialization
 CCudaFDKAlgorithm3D::CCudaFDKAlgorithm3D(CProjector3D* _pProjector, 
-								   CFloat32ProjectionData3DMemory* _pProjectionData, 
-								   CFloat32VolumeData3DMemory* _pReconstruction)
+								   CFloat32ProjectionData3D* _pProjectionData, 
+								   CFloat32VolumeData3D* _pReconstruction)
 {
 	_clear();
 	initialize(_pProjector, _pProjectionData, _pReconstruction);
@@ -179,8 +179,8 @@ bool CCudaFDKAlgorithm3D::initialize(const Config& _cfg)
 //----------------------------------------------------------------------------------------
 // Initialize - C++
 bool CCudaFDKAlgorithm3D::initialize(CProjector3D* _pProjector, 
-								  CFloat32ProjectionData3DMemory* _pSinogram, 
-								  CFloat32VolumeData3DMemory* _pReconstruction)
+								  CFloat32ProjectionData3D* _pSinogram, 
+								  CFloat32VolumeData3D* _pReconstruction)
 {
 	// if already initialized, clear first
 	if (m_bIsInitialized) {
@@ -225,9 +225,9 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)
 
 	ASTRA_ASSERT(conegeom);
 
-	CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
+	CFloat32ProjectionData3D* pSinoMem = dynamic_cast<CFloat32ProjectionData3D*>(m_pSinogram);
 	ASTRA_ASSERT(pSinoMem);
-	CFloat32VolumeData3DMemory* pReconMem = dynamic_cast<CFloat32VolumeData3DMemory*>(m_pReconstruction);
+	CFloat32VolumeData3D* pReconMem = dynamic_cast<CFloat32VolumeData3D*>(m_pReconstruction);
 	ASTRA_ASSERT(pReconMem);
 
 	const float *filter = NULL;
diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index 6783093..ce808eb 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -101,14 +101,14 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaForwardProjection3D", "No ProjectionDataId tag specified.");
 	id = node.getContentInt();
-	m_pProjections = dynamic_cast<CFloat32ProjectionData3DMemory*>(CData3DManager::getSingleton().get(id));
+	m_pProjections = dynamic_cast<CFloat32ProjectionData3D*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("VolumeDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaForwardProjection3D", "No VolumeDataId tag specified.");
 	id = node.getContentInt();
-	m_pVolume = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
+	m_pVolume = dynamic_cast<CFloat32VolumeData3D*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("VolumeDataId");
 
 	// optional: projector
@@ -140,8 +140,8 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 
 
 bool CCudaForwardProjectionAlgorithm3D::initialize(CProjector3D* _pProjector, 
-                                  CFloat32ProjectionData3DMemory* _pProjections, 
-                                  CFloat32VolumeData3DMemory* _pVolume,
+                                  CFloat32ProjectionData3D* _pProjections, 
+                                  CFloat32VolumeData3D* _pVolume,
                                   int _iGPUindex, int _iDetectorSuperSampling)
 {
 	m_pProjector = _pProjector;
diff --git a/src/Float32Data3D.cpp b/src/Float32Data3D.cpp
index cad1f18..cc824bd 100644
--- a/src/Float32Data3D.cpp
+++ b/src/Float32Data3D.cpp
@@ -28,6 +28,10 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 #include "astra/Float32Data3D.h"
 #include <sstream>
 
+#ifdef ASTRA_CUDA
+#include "../../cuda/3d/mem3d.h"
+#endif
+
 using namespace std;
 
 namespace astra {
@@ -60,7 +64,5 @@ std::string CFloat32Data3D::description() const
 	if (getType() == CFloat32Data3D::VOLUME) res << " volume data \t";
 	return res.str();
 }
-//----------------------------------------------------------------------------------------
-
 
 } // end namespace astra
diff --git a/src/Float32Data3DGPU.cpp b/src/Float32Data3DGPU.cpp
new file mode 100644
index 0000000..cd9c4ad
--- /dev/null
+++ b/src/Float32Data3DGPU.cpp
@@ -0,0 +1,98 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "astra/Float32Data3DGPU.h"
+
+namespace astra {
+
+//----------------------------------------------------------------------------------------
+// Default constructor.
+CFloat32Data3DGPU::CFloat32Data3DGPU()
+{
+	_clear();
+	m_bInitialized = false;
+}
+
+//----------------------------------------------------------------------------------------
+// Destructor.
+CFloat32Data3DGPU::~CFloat32Data3DGPU() 
+{
+	if (m_bInitialized)
+	{
+		_unInit();
+	}
+}
+
+//----------------------------------------------------------------------------------------
+// Initializes an instance of the CFloat32Data3DGPU class with pre-allocated memory
+bool CFloat32Data3DGPU::_initialize(int _iWidth, int _iHeight, int _iDepth, astraCUDA3d::MemHandle3D _hnd)
+{
+	// basic checks
+	ASTRA_ASSERT(_iWidth > 0);
+	ASTRA_ASSERT(_iHeight > 0);
+	ASTRA_ASSERT(_iDepth > 0);
+	//ASTRA_ASSERT(_pCustomMemory != NULL);
+
+	if (m_bInitialized) {
+		_unInit();
+	}
+
+	// calculate size
+	m_iWidth = _iWidth;
+	m_iHeight = _iHeight;
+	m_iDepth = _iDepth;
+	m_iSize = (size_t)m_iWidth * m_iHeight * m_iDepth;
+
+	m_hnd = _hnd;
+
+	// initialization complete
+	return true;
+}
+//----------------------------------------------------------------------------------------
+// Clear all member variables, setting all numeric variables to 0 and all pointers to NULL. 
+void CFloat32Data3DGPU::_clear()
+{
+	m_iWidth = 0;
+	m_iHeight = 0;
+	m_iDepth = 0;
+	m_iSize = 0;
+
+	m_hnd.d.reset();
+}
+
+//----------------------------------------------------------------------------------------
+// Un-initialize the object, bringing it back in the unitialized state.
+void CFloat32Data3DGPU::_unInit()
+{
+	ASTRA_ASSERT(m_bInitialized);
+
+	_clear();
+	m_bInitialized = false;
+}
+
+} // end namespace astra
diff --git a/src/Float32Data3DMemory.cpp b/src/Float32Data3DMemory.cpp
index 7e60527..5c5c310 100644
--- a/src/Float32Data3DMemory.cpp
+++ b/src/Float32Data3DMemory.cpp
@@ -163,7 +163,7 @@ bool CFloat32Data3DMemory::_initialize(int _iWidth, int _iHeight, int _iDepth, C
 	ASTRA_ASSERT(_iWidth > 0);
 	ASTRA_ASSERT(_iHeight > 0);
 	ASTRA_ASSERT(_iDepth > 0);
-	ASTRA_ASSERT(_pCustomMemory != NULL);
+	//ASTRA_ASSERT(_pCustomMemory != NULL);
 
 	if (m_bInitialized) {
 		_unInit();
diff --git a/src/Float32ProjectionData3DGPU.cpp b/src/Float32ProjectionData3DGPU.cpp
new file mode 100644
index 0000000..0e063d6
--- /dev/null
+++ b/src/Float32ProjectionData3DGPU.cpp
@@ -0,0 +1,71 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "astra/Float32ProjectionData3DGPU.h"
+
+using namespace std;
+
+namespace astra
+{
+
+//----------------------------------------------------------------------------------------
+// Default constructor
+CFloat32ProjectionData3DGPU::CFloat32ProjectionData3DGPU() :
+	CFloat32Data3DGPU() 
+{
+	m_pGeometry = NULL;
+	m_bInitialized = false;
+}
+
+//----------------------------------------------------------------------------------------
+// Create an instance of the CFloat32ProjectionData2D class with pre-allocated data
+CFloat32ProjectionData3DGPU::CFloat32ProjectionData3DGPU(CProjectionGeometry3D* _pGeometry, astraCUDA3d::MemHandle3D _hnd)
+{
+	m_bInitialized = false;
+	m_bInitialized = initialize(_pGeometry, _hnd);
+}
+
+
+//----------------------------------------------------------------------------------------
+// Destructor
+CFloat32ProjectionData3DGPU::~CFloat32ProjectionData3DGPU() 
+{
+	delete m_pGeometry;
+	m_pGeometry = 0;
+}
+
+//----------------------------------------------------------------------------------------
+// Initialization
+bool CFloat32ProjectionData3DGPU::initialize(CProjectionGeometry3D* _pGeometry, astraCUDA3d::MemHandle3D _hnd)
+{
+	m_pGeometry = _pGeometry->clone();
+	m_bInitialized = _initialize(m_pGeometry->getDetectorColCount(), m_pGeometry->getProjectionCount(), m_pGeometry->getDetectorRowCount(), _hnd);
+	return m_bInitialized;
+}
+
+} // end namespace astra
diff --git a/src/Float32ProjectionData3DMemory.cpp b/src/Float32ProjectionData3DMemory.cpp
index 81daf9e..69033d5 100644
--- a/src/Float32ProjectionData3DMemory.cpp
+++ b/src/Float32ProjectionData3DMemory.cpp
@@ -114,7 +114,7 @@ bool CFloat32ProjectionData3DMemory::initialize(CProjectionGeometry3D* _pGeometr
 
 //----------------------------------------------------------------------------------------
 // Initialization
-bool CFloat32ProjectionData3DMemory::initialize(CProjectionGeometry3D* _pGeometry, CFloat32CustomMemory* _pCustomMemory) 
+bool CFloat32ProjectionData3DMemory::initialize(CProjectionGeometry3D* _pGeometry, CFloat32CustomMemory* _pCustomMemory)
 {
 	m_pGeometry = _pGeometry->clone();
 	m_bInitialized = _initialize(m_pGeometry->getDetectorColCount(), m_pGeometry->getProjectionCount(), m_pGeometry->getDetectorRowCount(), _pCustomMemory);
diff --git a/src/Float32VolumeData3DGPU.cpp b/src/Float32VolumeData3DGPU.cpp
new file mode 100644
index 0000000..82a222f
--- /dev/null
+++ b/src/Float32VolumeData3DGPU.cpp
@@ -0,0 +1,71 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "astra/Float32VolumeData3DGPU.h"
+
+using namespace std;
+
+namespace astra
+{
+
+//----------------------------------------------------------------------------------------
+// Default constructor
+CFloat32VolumeData3DGPU::CFloat32VolumeData3DGPU() :
+	CFloat32Data3DGPU() 
+{
+	m_pGeometry = NULL;
+	m_bInitialized = false;
+}
+
+//----------------------------------------------------------------------------------------
+// Create an instance of the CFloat32VolumeData2D class with pre-allocated data
+CFloat32VolumeData3DGPU::CFloat32VolumeData3DGPU(CVolumeGeometry3D* _pGeometry, astraCUDA3d::MemHandle3D _hnd)
+{
+	m_bInitialized = false;
+	m_bInitialized = initialize(_pGeometry, _hnd);
+}
+
+
+//----------------------------------------------------------------------------------------
+// Destructor
+CFloat32VolumeData3DGPU::~CFloat32VolumeData3DGPU() 
+{
+	delete m_pGeometry;
+	m_pGeometry = 0;
+}
+
+//----------------------------------------------------------------------------------------
+// Initialization
+bool CFloat32VolumeData3DGPU::initialize(CVolumeGeometry3D* _pGeometry, astraCUDA3d::MemHandle3D _hnd)
+{
+	m_pGeometry = _pGeometry->clone();
+	m_bInitialized = _initialize(m_pGeometry->getGridColCount(), m_pGeometry->getGridRowCount(), m_pGeometry->getGridSliceCount(), _hnd);
+	return m_bInitialized;
+}
+
+} // end namespace astra
diff --git a/src/Float32VolumeData3DMemory.cpp b/src/Float32VolumeData3DMemory.cpp
index 9f81c85..27ae87b 100644
--- a/src/Float32VolumeData3DMemory.cpp
+++ b/src/Float32VolumeData3DMemory.cpp
@@ -114,7 +114,7 @@ bool CFloat32VolumeData3DMemory::initialize(CVolumeGeometry3D* _pGeometry, float
 }
 //----------------------------------------------------------------------------------------
 // Initialization
-bool CFloat32VolumeData3DMemory::initialize(CVolumeGeometry3D* _pGeometry, CFloat32CustomMemory* _pCustomMemory) 
+bool CFloat32VolumeData3DMemory::initialize(CVolumeGeometry3D* _pGeometry, CFloat32CustomMemory* _pCustomMemory)
 {
 	m_pGeometry = _pGeometry->clone();
 	m_bInitialized = _initialize(m_pGeometry->getGridColCount(), m_pGeometry->getGridRowCount(), m_pGeometry->getGridSliceCount(), _pCustomMemory);
-- 
cgit v1.2.3