From 9c4222b4cab810815a0adee0302501471da177aa Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 23 Oct 2018 10:19:32 +0200 Subject: Add minimal GPU Array interface This extension (only) allows creating a CUDA 3D array, copying projection data into it, performing a BP from the array, and freeing the array. --- cuda/3d/astra3d.cu | 45 +++++++++++++++++++ cuda/3d/mem3d.cu | 95 ++++++++++++++++++++++++++++++++++++---- cuda/3d/util3d.cu | 35 +++++++++++++++ include/astra/Float32Data3DGPU.h | 3 -- include/astra/cuda/3d/astra3d.h | 7 +++ include/astra/cuda/3d/mem3d.h | 14 ++++++ include/astra/cuda/3d/util3d.h | 1 + 7 files changed, 189 insertions(+), 11 deletions(-) diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 23af36a..be258be 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -35,12 +35,15 @@ along with the ASTRA Toolbox. If not, see . #include "astra/cuda/3d/fdk.h" #include "astra/cuda/3d/arith3d.h" #include "astra/cuda/3d/astra3d.h" +#include "astra/cuda/3d/mem3d.h" #include "astra/ParallelProjectionGeometry3D.h" #include "astra/ParallelVecProjectionGeometry3D.h" #include "astra/ConeProjectionGeometry3D.h" #include "astra/ConeVecProjectionGeometry3D.h" #include "astra/VolumeGeometry3D.h" +#include "astra/Float32ProjectionData3DGPU.h" +#include "astra/Logging.h" #include #include @@ -1317,4 +1320,46 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, } +_AstraExport void uploadMultipleProjections(CFloat32ProjectionData3DGPU *proj, + const float *data, + unsigned int y_min, unsigned int y_max) +{ + astraCUDA3d::MemHandle3D hnd = proj->getHandle(); + + astraCUDA3d::SDimensions3D dims1; + dims1.iProjU = proj->getDetectorColCount(); + dims1.iProjV = proj->getDetectorRowCount(); + dims1.iProjAngles = y_max - y_min + 1; + + cudaPitchedPtr D_proj = allocateProjectionData(dims1); + bool ok = copyProjectionsToDevice(data, D_proj, dims1); + if (!ok) + ASTRA_ERROR("Failed to upload projection to GPU"); + + astraCUDA3d::MemHandle3D hnd1 = astraCUDA3d::wrapHandle( + (float *)D_proj.ptr, + dims1.iProjU, dims1.iProjAngles, dims1.iProjV, + D_proj.pitch / sizeof(float)); + + astraCUDA3d::SSubDimensions3D subdims; + subdims.nx = dims1.iProjU; + subdims.ny = proj->getAngleCount(); + subdims.nz = dims1.iProjV; + subdims.pitch = D_proj.pitch / sizeof(float); // FIXME: Pitch for wrong obj! + subdims.subnx = dims1.iProjU; + subdims.subny = dims1.iProjAngles; + subdims.subnz = dims1.iProjV; + subdims.subx = 0; + subdims.suby = y_min; + subdims.subz = 0; + + ok = astraCUDA3d::copyIntoArray(hnd, hnd1, subdims); + if (!ok) + ASTRA_ERROR("Failed to copy projection into 3d data"); + + cudaFree(D_proj.ptr); + +} + + } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index fe3e723..697d2d2 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -49,6 +49,7 @@ namespace astraCUDA3d { struct SMemHandle3D_internal { cudaPitchedPtr ptr; + cudaArray *arr; unsigned int nx; unsigned int ny; unsigned int nz; @@ -79,6 +80,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me hnd.nx = x; hnd.ny = y; hnd.nz = z; + hnd.arr = 0; size_t free = astraCUDA::availableGPUMemory(); @@ -113,6 +115,7 @@ MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Me bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned int z) { SMemHandle3D_internal& hnd = *handle.d.get(); + assert(!hnd.arr); cudaError_t err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)); return err == cudaSuccess; } @@ -120,7 +123,11 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned bool freeGPUMemory(MemHandle3D handle) { size_t free = astraCUDA::availableGPUMemory(); - cudaError_t err = cudaFree(handle.d->ptr.ptr); + cudaError_t err; + if (handle.d->arr) + err = cudaFreeArray(handle.d->arr); + else + err = cudaFree(handle.d->ptr.ptr); size_t free2 = astraCUDA::availableGPUMemory(); ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2); @@ -132,6 +139,7 @@ bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D & { ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz); ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + assert(!dst.d->arr); cudaPitchedPtr s; s.ptr = (void*)src; // const cast away s.pitch = pos.pitch * sizeof(float); @@ -162,6 +170,7 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) { ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz); ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + assert(!src.d->arr); cudaPitchedPtr d; d.ptr = (void*)dst; d.pitch = pos.pitch * sizeof(float); @@ -170,15 +179,21 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize); cudaMemcpy3DParms p; - p.srcArray = 0; p.srcPos = make_cudaPos(0, 0, 0); - p.srcPtr = src.d->ptr; p.dstArray = 0; p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz); p.dstPtr = d; - p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + if (src.d->ptr.ptr) { + p.srcArray = 0; + p.srcPtr = src.d->ptr; + p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + } else { + p.srcArray = src.d->arr; + p.srcPtr.ptr = 0; + p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz); + } p.kind = cudaMemcpyDeviceToHost; @@ -191,6 +206,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel) { + assert(!projData.d->arr); + assert(!volData.d->arr); SDimensions3D dims; SProjectorParams3D params; @@ -253,6 +270,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling, bool bFDKWeighting) { + assert(!volData.d->arr); SDimensions3D dims; SProjectorParams3D params; @@ -273,10 +291,17 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con params.bFDKWeighting = bFDKWeighting; - if (pParProjs) - ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); - else - ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); + if (pParProjs) { + if (projData.d->arr) + ok &= Par3DBP_Array(volData.d->ptr, projData.d->arr, dims, pParProjs, params); + else + ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); + } else { + if (projData.d->arr) + ok &= ConeBP_Array(volData.d->ptr, projData.d->arr, dims, pConeProjs, params); + else + ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); + } delete[] pParProjs; delete[] pConeProjs; @@ -287,6 +312,8 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter) { + assert(!projData.d->arr); + assert(!volData.d->arr); SDimensions3D dims; SProjectorParams3D params; @@ -328,6 +355,7 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y SMemHandle3D_internal h; h.ptr = ptr; + h.arr = 0; MemHandle3D hnd; hnd.d = boost::shared_ptr(new SMemHandle3D_internal); @@ -336,6 +364,57 @@ _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y return hnd; } +MemHandle3D createProjectionArrayHandle(const float *ptr, unsigned int x, unsigned int y, unsigned int z) +{ + SDimensions3D dims; + dims.iProjU = x; + dims.iProjAngles = y; + dims.iProjV = z; + + cudaArray* cuArray = allocateProjectionArray(dims); + transferHostProjectionsToArray(ptr, cuArray, dims); + + SMemHandle3D_internal h; + h.arr = cuArray; + h.ptr.ptr = 0; + + MemHandle3D hnd; + hnd.d = boost::shared_ptr(new SMemHandle3D_internal); + *hnd.d = h; + + return hnd; +} + +bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions3D &pos) +{ + assert(handle.d->arr); + assert(!handle.d->ptr.ptr); + assert(!subdata.d->arr); + assert(subdata.d->ptr.ptr); + + ASTRA_DEBUG("Copying %d x %d x %d into GPU array", pos.subnx, pos.subny, pos.subnz); + ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", subdata.d->ptr.pitch, subdata.d->ptr.xsize, subdata.d->ptr.ysize); + + cudaMemcpy3DParms p; + p.srcArray = 0; + p.srcPos = make_cudaPos(0, 0, 0); + p.srcPtr = subdata.d->ptr; + + p.dstArray = handle.d->arr; + p.dstPos = make_cudaPos(pos.subx, pos.suby, pos.subz); + p.dstPtr.ptr = 0; + + p.extent = make_cudaExtent(pos.subnx, pos.subny, pos.subnz); + + p.kind = cudaMemcpyHostToDevice; + + cudaError_t err = cudaMemcpy3D(&p); + + return err == cudaSuccess; + +} + } diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index 90aa5ea..41eb9d2 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -386,6 +386,41 @@ bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, con return true; } +bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims) +{ + cudaExtent extentA; + extentA.width = dims.iProjU; + extentA.height = dims.iProjAngles; + extentA.depth = dims.iProjV; + + cudaPitchedPtr ptr; + ptr.ptr = (void*)projData; // const cast away + ptr.pitch = dims.iProjU*sizeof(float); + ptr.xsize = dims.iProjU*sizeof(float); + ptr.ysize = dims.iProjAngles; + + cudaMemcpy3DParms p; + cudaPos zp = {0, 0, 0}; + p.srcArray = 0; + p.srcPos = zp; + p.srcPtr = ptr; + p.dstArray = array; + p.dstPtr.ptr = 0; + p.dstPtr.pitch = 0; + p.dstPtr.xsize = 0; + p.dstPtr.ysize = 0; + p.dstPos = zp; + p.extent = extentA; + p.kind = cudaMemcpyHostToDevice; + + cudaError err = cudaMemcpy3D(&p); + ASTRA_CUDA_ASSERT(err); + + // TODO: check errors + + return true; +} + float dotProduct3D(cudaPitchedPtr data, unsigned int x, unsigned int y, diff --git a/include/astra/Float32Data3DGPU.h b/include/astra/Float32Data3DGPU.h index cac38f6..b2cdcda 100644 --- a/include/astra/Float32Data3DGPU.h +++ b/include/astra/Float32Data3DGPU.h @@ -38,9 +38,6 @@ along with the ASTRA Toolbox. If not, see . namespace astra { -astraCUDA3d::MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y, unsigned int z, unsigned int pitch); - - /** * This class represents a 3-dimensional block of 32-bit floating point data. * The data block is stored on a GPU, and owned by external code. diff --git a/include/astra/cuda/3d/astra3d.h b/include/astra/cuda/3d/astra3d.h index 0ea752d..28a8f01 100644 --- a/include/astra/cuda/3d/astra3d.h +++ b/include/astra/cuda/3d/astra3d.h @@ -42,6 +42,7 @@ class CParallelVecProjectionGeometry3D; class CConeProjectionGeometry3D; class CConeVecProjectionGeometry3D; class CVolumeGeometry3D; +class CFloat32ProjectionData3DGPU; class AstraSIRT3d_internal; using astraCUDA3d::Cuda3DProjectionKernel; @@ -308,6 +309,12 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); +_AstraExport void uploadMultipleProjections(CFloat32ProjectionData3DGPU *proj, + const float *data, + unsigned int y_min, + unsigned int y_max); + + } diff --git a/include/astra/cuda/3d/mem3d.h b/include/astra/cuda/3d/mem3d.h index 78e1294..8c3956e 100644 --- a/include/astra/cuda/3d/mem3d.h +++ b/include/astra/cuda/3d/mem3d.h @@ -37,6 +37,17 @@ class CVolumeGeometry3D; class CProjectionGeometry3D; } + +// MemHandle3D defines a very basic opaque interface to GPU memory pointers. +// Its intended use is allowing ASTRA code to pass around GPU pointers without +// requiring CUDA headers. +// +// It generally wraps CUDA linear global memory. +// +// As a very basic extension, it also allows wrapping a CUDA 3D array. +// This extension (only) allows creating a CUDA 3D array, copying projection +// data into it, performing a BP from the array, and freeing the array. + namespace astraCUDA3d { // TODO: Make it possible to delete these handles when they're no longer @@ -80,6 +91,7 @@ enum Mem3DZeroMode { int maxBlockDimension(); _AstraExport MemHandle3D wrapHandle(float *D_ptr, unsigned int x, unsigned int y, unsigned int z, unsigned int pitch); +MemHandle3D createProjectionArrayHandle(const float *ptr, unsigned int x, unsigned int y, unsigned int z); MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero); @@ -93,6 +105,8 @@ bool zeroGPUMemory(MemHandle3D handle, unsigned int x, unsigned int y, unsigned bool setGPUIndex(int index); +bool copyIntoArray(MemHandle3D handle, MemHandle3D subdata, const SSubDimensions3D &pos); + bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/cuda/3d/util3d.h b/include/astra/cuda/3d/util3d.h index 17eb31e..0146d2d 100644 --- a/include/astra/cuda/3d/util3d.h +++ b/include/astra/cuda/3d/util3d.h @@ -51,6 +51,7 @@ bool duplicateProjectionData(cudaPitchedPtr& D_dest, const cudaPitchedPtr& D_src bool transferProjectionsToArray(cudaPitchedPtr D_projData, cudaArray* array, const SDimensions3D& dims); +bool transferHostProjectionsToArray(const float *projData, cudaArray* array, const SDimensions3D& dims); bool transferVolumeToArray(cudaPitchedPtr D_volumeData, cudaArray* array, const SDimensions3D& dims); bool zeroProjectionArray(cudaArray* array, const SDimensions3D& dims); bool zeroVolumeArray(cudaArray* array, const SDimensions3D& dims); -- cgit v1.2.3