From 9c4222b4cab810815a0adee0302501471da177aa Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 23 Oct 2018 10:19:32 +0200
Subject: Add minimal GPU Array interface

This extension (only) allows creating a CUDA 3D array, copying projection
data into it, performing a BP from the array, and freeing the array.
---
 cuda/3d/astra3d.cu               | 45 +++++++++++++++++++
 cuda/3d/mem3d.cu                 | 95 ++++++++++++++++++++++++++++++++++++----
 cuda/3d/util3d.cu                | 35 +++++++++++++++
 include/astra/Float32Data3DGPU.h |  3 --
 include/astra/cuda/3d/astra3d.h  |  7 +++
 include/astra/cuda/3d/mem3d.h    | 14 ++++++
 include/astra/cuda/3d/util3d.h   |  1 +
 7 files changed, 189 insertions(+), 11 deletions(-)

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 23af36a..be258be 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -35,12 +35,15 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 #include "astra/cuda/3d/fdk.h"
 #include "astra/cuda/3d/arith3d.h"
 #include "astra/cuda/3d/astra3d.h"
+#include "astra/cuda/3d/mem3d.h"
 
 #include "astra/ParallelProjectionGeometry3D.h"
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
 #include "astra/VolumeGeometry3D.h"
+#include "astra/Float32ProjectionData3DGPU.h"
+#include "astra/Logging.h"
 
 #include <iostream>
 #include <cstdio>
@@ -1317,4 +1320,46 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 
 }
 
+_AstraExport void uploadMultipleProjections(CFloat32ProjectionData3DGPU *proj,
+                                         const float *data,
+                                         unsigned int y_min, unsigned int y_max)
+{
+	astraCUDA3d::MemHandle3D hnd = proj->getHandle();
+
+	astraCUDA3d::SDimensions3D dims1;
+	dims1.iProjU = proj->getDetectorColCount();
+	dims1.iProjV = proj->getDetectorRowCount();
+	dims1.iProjAngles = y_max - y_min + 1;
+
+	cudaPitchedPtr D_proj = allocateProjectionData(dims1);
+	bool ok = copyProjectionsToDevice(data, D_proj, dims1);
+	if (!ok)
+		ASTRA_ERROR("Failed to upload projection to GPU");
+
+	astraCUDA3d::MemHandle3D hnd1 = astraCUDA3d::wrapHandle(
+			(float *)D_proj.ptr,
+			dims1.iProjU, dims1.iProjAngles, dims1.iProjV,
+			D_proj.pitch / sizeof(float));
+
+	astraCUDA3d::SSubDimensions3D subdims;
+	subdims.nx = dims1.iProjU;
+	subdims.ny = proj->getAngleCount();
+	subdims.nz = dims1.iProjV;
+	subdims.pitch = D_proj.pitch / sizeof(float); // FIXME: Pitch for wrong obj!
+	subdims.subnx = dims1.iProjU;
+	subdims.subny = dims1.iProjAngles;
+	subdims.subnz = dims1.iProjV;
+	subdims.subx = 0;
+	subdims.suby = y_min;
+	subdims.subz = 0;
+
+	ok = astraCUDA3d::copyIntoArray(hnd, hnd1, subdims);
+	if (!ok)
+		ASTRA_ERROR("Failed to copy projection into 3d data");
+
+	cudaFree(D_proj.ptr);
+
+}
+
+
 }
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index fe3e723..697d2d2 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -49,6 +49,7 @@ namespace astraCUDA3d {
 struct SMemHandle3D_internal
 {
 	cudaPitchedPtr ptr;
+	cudaArray *arr;
 	unsigned int nx;
 	unsigned int ny;
 	unsigned int nz;
@@ -79,6 +80,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me
 	hnd.nx = x;
 	hnd.ny = y;
 	hnd.nz = z;
+	hnd.arr = 0;
 
 	size_t free = astraCUDA::availableGPUMemory();
 
@@ -113,6 +115,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me
 bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned int z)
 {
 	SMemHandle3D_internal& hnd = *handle.d.get();
+	assert(!hnd.arr);
 	cudaError_t err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z));
 	return err == cudaSuccess;
 }
@@ -120,7 +123,11 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned
 bool freeGPUMemory(MemHandle3D handle)
 {
 	size_t free = astraCUDA::availableGPUMemory();
-	cudaError_t err = cudaFree(handle.d->ptr.ptr);
+	cudaError_t err;
+	if (handle.d->arr)
+		err = cudaFreeArray(handle.d->arr);
+	else
+		err = cudaFree(handle.d->ptr.ptr);
 	size_t free2 = astraCUDA::availableGPUMemory();
 
 	ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2);
@@ -132,6 +139,7 @@ bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &
 {
 	ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz);
 	ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+	assert(!dst.d->arr);
 	cudaPitchedPtr s;
 	s.ptr = (void*)src; // const cast away
 	s.pitch = pos.pitch * sizeof(float);
@@ -162,6 +170,7 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
 {
 	ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz);
 	ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+	assert(!src.d->arr);
 	cudaPitchedPtr d;
 	d.ptr = (void*)dst;
 	d.pitch = pos.pitch * sizeof(float);
@@ -170,15 +179,21 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
 	ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize);
 
 	cudaMemcpy3DParms p;
-	p.srcArray = 0;
 	p.srcPos = make_cudaPos(0, 0, 0);
-	p.srcPtr = src.d->ptr;
 
 	p.dstArray = 0;
 	p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
 	p.dstPtr = d;
 
-	p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+        if (src.d->ptr.ptr) {
+            p.srcArray = 0;
+            p.srcPtr = src.d->ptr;
+            p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+        } else {
+            p.srcArray = src.d->arr;
+            p.srcPtr.ptr = 0;
+            p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz);
+        }
 
 	p.kind = cudaMemcpyDeviceToHost;
 
@@ -191,6 +206,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
 
 bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
 {
+	assert(!projData.d->arr);
+	assert(!volData.d->arr);
 	SDimensions3D dims;
 	SProjectorParams3D params;
 
@@ -253,6 +270,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
 
 bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling, bool bFDKWeighting)
 {
+	assert(!volData.d->arr);
 	SDimensions3D dims;
 	SProjectorParams3D params;
 
@@ -273,10 +291,17 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
 
 	params.bFDKWeighting = bFDKWeighting;
 
-	if (pParProjs)
-		ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
-	else
-		ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
+	if (pParProjs) {
+		if (projData.d->arr)
+			ok &= Par3DBP_Array(volData.d->ptr, projData.d->arr, dims, pParProjs, params);
+		else
+			ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
+	} else {
+		if (projData.d->arr)
+			ok &= ConeBP_Array(volData.d->ptr, projData.d->arr, dims, pConeProjs, params);
+		else
+			ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
+	}
 
 	delete[] pParProjs;
 	delete[] pConeProjs;
@@ -287,6 +312,8 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
 
 bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter)
 {
+	assert(!projData.d->arr);
+	assert(!volData.d->arr);
 	SDimensions3D dims;
 	SProjectorParams3D params;
 
@@ -328,6 +355,7 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y
 
 	SMemHandle3D_internal h;
 	h.ptr = ptr;
+	h.arr = 0;
 
 	MemHandle3D hnd;
 	hnd.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
@@ -336,6 +364,57 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y
 	return hnd;
 }
 
+MemHandle3D createProjectionArrayHandle(const float *ptr, unsigned int x, unsigned int y, unsigned int z)
+{
+	SDimensions3D dims;
+	dims.iProjU = x;
+	dims.iProjAngles = y;
+	dims.iProjV = z;
+
+	cudaArray* cuArray = allocateProjectionArray(dims);
+	transferHostProjectionsToArray(ptr, cuArray, dims);
+
+	SMemHandle3D_internal h;
+	h.arr = cuArray;
+	h.ptr.ptr = 0;
+
+	MemHandle3D hnd;
+	hnd.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
+	*hnd.d = h;
+
+	return hnd;
+}
+
+bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions3D &pos)
+{
+	assert(handle.d->arr);
+	assert(!handle.d->ptr.ptr);
+	assert(!subdata.d->arr);
+	assert(subdata.d->ptr.ptr);
+
+	ASTRA_DEBUG("Copying %d x %d x %d into GPU array", pos.subnx, pos.subny, pos.subnz);
+	ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+	ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", subdata.d->ptr.pitch, subdata.d->ptr.xsize, subdata.d->ptr.ysize);
+
+	cudaMemcpy3DParms p;
+	p.srcArray = 0;
+	p.srcPos = make_cudaPos(0, 0, 0);
+	p.srcPtr = subdata.d->ptr;
+
+	p.dstArray = handle.d->arr;
+	p.dstPos = make_cudaPos(pos.subx, pos.suby, pos.subz);
+	p.dstPtr.ptr = 0;
+
+	p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz);
+
+	p.kind = cudaMemcpyHostToDevice;
+
+	cudaError_t err = cudaMemcpy3D(&p);
+
+	return err == cudaSuccess;
+
+}
+
 
 
 }
diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu
index 90aa5ea..41eb9d2 100644
--- a/cuda/3d/util3d.cu
+++ b/cuda/3d/util3d.cu
@@ -386,6 +386,41 @@ bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, con
 
 	return true;
 }
+bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims)
+{
+	cudaExtent extentA;
+	extentA.width = dims.iProjU;
+	extentA.height = dims.iProjAngles;
+	extentA.depth = dims.iProjV;
+
+	cudaPitchedPtr ptr;
+	ptr.ptr = (void*)projData; // const cast away
+	ptr.pitch = dims.iProjU*sizeof(float);
+	ptr.xsize = dims.iProjU*sizeof(float);
+	ptr.ysize = dims.iProjAngles;
+
+	cudaMemcpy3DParms p;
+	cudaPos zp = {0, 0, 0};
+	p.srcArray = 0;
+	p.srcPos = zp;
+	p.srcPtr = ptr;
+	p.dstArray = array;
+	p.dstPtr.ptr = 0;
+	p.dstPtr.pitch = 0;
+	p.dstPtr.xsize = 0;
+	p.dstPtr.ysize = 0;
+	p.dstPos = zp;
+	p.extent = extentA;
+	p.kind = cudaMemcpyHostToDevice;
+
+	cudaError err = cudaMemcpy3D(&p);
+	ASTRA_CUDA_ASSERT(err);
+
+	// TODO: check errors
+
+	return true;
+}
+
 
 
 float dotProduct3D(cudaPitchedPtr data, unsigned int x, unsigned int y,
diff --git a/include/astra/Float32Data3DGPU.h b/include/astra/Float32Data3DGPU.h
index cac38f6..b2cdcda 100644
--- a/include/astra/Float32Data3DGPU.h
+++ b/include/astra/Float32Data3DGPU.h
@@ -38,9 +38,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 namespace astra {
 
 
-astraCUDA3d::MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y, unsigned int z, unsigned int pitch);
-
-
 /** 
  * This class represents a 3-dimensional block of 32-bit floating point data.
  * The data block is stored on a GPU, and owned by external code.
diff --git a/include/astra/cuda/3d/astra3d.h b/include/astra/cuda/3d/astra3d.h
index 0ea752d..28a8f01 100644
--- a/include/astra/cuda/3d/astra3d.h
+++ b/include/astra/cuda/3d/astra3d.h
@@ -42,6 +42,7 @@ class CParallelVecProjectionGeometry3D;
 class CConeProjectionGeometry3D;
 class CConeVecProjectionGeometry3D;
 class CVolumeGeometry3D;
+class CFloat32ProjectionData3DGPU;
 class AstraSIRT3d_internal;
 
 using astraCUDA3d::Cuda3DProjectionKernel;
@@ -308,6 +309,12 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje
                       const CProjectionGeometry3D* pProjGeom,
                       int iGPUIndex, int iVoxelSuperSampling);
 
+_AstraExport void uploadMultipleProjections(CFloat32ProjectionData3DGPU *proj,
+                                            const float *data,
+                                            unsigned int y_min,
+                                            unsigned int y_max);
+
+
 }
 
 
diff --git a/include/astra/cuda/3d/mem3d.h b/include/astra/cuda/3d/mem3d.h
index 78e1294..8c3956e 100644
--- a/include/astra/cuda/3d/mem3d.h
+++ b/include/astra/cuda/3d/mem3d.h
@@ -37,6 +37,17 @@ class CVolumeGeometry3D;
 class CProjectionGeometry3D;	
 }
 
+
+// MemHandle3D defines a very basic opaque interface to GPU memory pointers.
+// Its intended use is allowing ASTRA code to pass around GPU pointers without
+// requiring CUDA headers.
+//
+// It generally wraps CUDA linear global memory.
+//
+// As a very basic extension, it also allows wrapping a CUDA 3D array.
+// This extension (only) allows creating a CUDA 3D array, copying projection
+// data into it, performing a BP from the array, and freeing the array.
+
 namespace astraCUDA3d {
 
 // TODO: Make it possible to delete these handles when they're no longer
@@ -80,6 +91,7 @@ enum Mem3DZeroMode {
 int maxBlockDimension();
 
 _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y, unsigned int z, unsigned int pitch);
+MemHandle3D createProjectionArrayHandle(const float *ptr, unsigned int x, unsigned int y, unsigned int z);
 
 MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero);
 
@@ -93,6 +105,8 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned
 
 bool setGPUIndex(int index);
 
+bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions3D &pos);
+
 
 bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel);
 
diff --git a/include/astra/cuda/3d/util3d.h b/include/astra/cuda/3d/util3d.h
index 17eb31e..0146d2d 100644
--- a/include/astra/cuda/3d/util3d.h
+++ b/include/astra/cuda/3d/util3d.h
@@ -51,6 +51,7 @@ bool duplicateProjectionData(cudaPitchedPtr& D_dest, const cudaPitchedPtr& D_src
 
 
 bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, const SDimensions3D& dims);
+bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims);
 bool transferVolumeToArray(cudaPitchedPtr D_volumeData, cudaArray* array, const SDimensions3D& dims);
 bool zeroProjectionArray(cudaArray* array, const SDimensions3D& dims);
 bool zeroVolumeArray(cudaArray* array, const SDimensions3D& dims);
-- 
cgit v1.2.3