summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cuda/2d/astra.cu3
-rw-r--r--cuda/2d/fft.cu46
-rw-r--r--cuda/3d/algo3d.cu21
-rw-r--r--cuda/3d/algo3d.h5
-rw-r--r--cuda/3d/astra3d.cu215
-rw-r--r--cuda/3d/astra3d.h18
-rw-r--r--cuda/3d/cgls3d.cu2
-rw-r--r--cuda/3d/cone_bp.cu40
-rw-r--r--cuda/3d/cone_bp.h4
-rw-r--r--cuda/3d/cone_fp.cu95
-rw-r--r--cuda/3d/cone_fp.h4
-rw-r--r--cuda/3d/dims3d.h24
-rw-r--r--cuda/3d/fdk.cu289
-rw-r--r--cuda/3d/fdk.h8
-rw-r--r--cuda/3d/mem3d.cu55
-rw-r--r--cuda/3d/mem3d.h1
-rw-r--r--cuda/3d/par3d_bp.cu30
-rw-r--r--cuda/3d/par3d_bp.h4
-rw-r--r--cuda/3d/par3d_fp.cu156
-rw-r--r--cuda/3d/par3d_fp.h6
-rw-r--r--cuda/3d/sirt3d.cu2
-rw-r--r--include/astra/CompositeGeometryManager.h11
-rw-r--r--include/astra/GeometryUtil3D.h52
-rw-r--r--include/astra/Singleton.h6
-rw-r--r--include/astra/Utilities.h3
-rw-r--r--matlab/mex/mexHelpFunctions.cpp28
-rw-r--r--python/astra/src/PythonPluginAlgorithm.cpp10
-rw-r--r--src/CompositeGeometryManager.cpp373
-rw-r--r--src/CudaFDKAlgorithm3D.cpp32
-rw-r--r--src/Utilities.cpp34
30 files changed, 863 insertions, 714 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 4c69628..b56ddf9 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -341,10 +341,9 @@ bool AstraFBP::run()
dims3d.iProjAngles = pData->dims.iProjAngles;
dims3d.iProjU = pData->dims.iProjDets;
dims3d.iProjV = 1;
- dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1;
astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance,
- pData->fOriginDetectorDistance, 0.0f, 0.0f,
+ pData->fOriginDetectorDistance, 0.0f,
pData->dims.fDetScale, 1.0f, // TODO: Are these correct?
pData->bShortScan, dims3d, pData->angles);
}
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2bfd493..3dd1c22 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -35,6 +35,7 @@ $Id$
#include <fstream>
#include "../../include/astra/Logging.h"
+#include "../../include/astra/Fourier.h"
using namespace astra;
@@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
float * pfFilt = new float[_iFFTFourierDetectorCount];
float * pfW = new float[_iFFTFourierDetectorCount];
+ // We cache one Fourier transform for repeated FBP's of the same size
+ static float *pfData = 0;
+ static int iFilterCacheSize = 0;
+
+ if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) {
+ // Compute filter in spatial domain
+
+ delete[] pfData;
+ pfData = new float[2*_iFFTRealDetectorCount];
+ int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)];
+ ip[0] = 0;
+ float32 *w = new float32[_iFFTRealDetectorCount/2];
+
+ for (int i = 0; i < _iFFTRealDetectorCount; ++i) {
+ pfData[2*i+1] = 0.0f;
+
+ if (i & 1) {
+ int j = i;
+ if (2*j > _iFFTRealDetectorCount)
+ j = _iFFTRealDetectorCount - j;
+ float f = M_PI * j;
+ pfData[2*i] = -1 / (f*f);
+ } else {
+ pfData[2*i] = 0.0f;
+ }
+ }
+
+ pfData[0] = 0.25f;
+
+ cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w);
+ delete[] ip;
+ delete[] w;
+
+ iFilterCacheSize = _iFFTRealDetectorCount;
+ }
+
for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount;
- // filt = 2*( 0:(order/2) )./order;
- pfFilt[iDetectorIndex] = 2.0f * fRelIndex;
- //pfFilt[iDetectorIndex] = 1.0f;
-
- // w = 2*pi*(0:size(filt,2)-1)/order
- pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex;
+ pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex];
+ pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex;
}
switch(_eFilter)
@@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount,
free(pfHostFilter);
}
-
#endif
diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index cc86b70..e2c1e43 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -41,7 +41,6 @@ ReconAlgo3D::ReconAlgo3D()
coneProjs = 0;
par3DProjs = 0;
shouldAbort = false;
- fOutputScale = 1.0f;
}
ReconAlgo3D::~ReconAlgo3D()
@@ -58,10 +57,10 @@ void ReconAlgo3D::reset()
shouldAbort = false;
}
-bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale)
+bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, const SProjectorParams3D& _params)
{
dims = _dims;
- fOutputScale = _outputScale;
+ params = _params;
coneProjs = new SConeProjection[dims.iProjAngles];
par3DProjs = 0;
@@ -71,10 +70,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject
return true;
}
-bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale)
+bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, const SProjectorParams3D& _params)
{
dims = _dims;
- fOutputScale = _outputScale;
+ params = _params;
par3DProjs = new SPar3DProjection[dims.iProjAngles];
coneProjs = 0;
@@ -89,10 +88,12 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
cudaPitchedPtr& D_projData,
float outputScale)
{
+ SProjectorParams3D p = params;
+ p.fOutputScale *= outputScale;
if (coneProjs) {
- return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
+ return ConeFP(D_volumeData, D_projData, dims, coneProjs, p);
} else {
- return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
+ return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, p);
}
}
@@ -100,10 +101,12 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
cudaPitchedPtr& D_projData,
float outputScale)
{
+ SProjectorParams3D p = params;
+ p.fOutputScale *= outputScale;
if (coneProjs) {
- return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
+ return ConeBP(D_volumeData, D_projData, dims, coneProjs, p);
} else {
- return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
+ return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, p);
}
}
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index 886b092..ce331ad 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -39,8 +39,8 @@ public:
ReconAlgo3D();
~ReconAlgo3D();
- bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale);
- bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale);
+ bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, const SProjectorParams3D& params);
+ bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, const SProjectorParams3D& params);
void signalAbort() { shouldAbort = true; }
@@ -55,6 +55,7 @@ protected:
float outputScale);
SDimensions3D dims;
+ SProjectorParams3D params;
SConeProjection* coneProjs;
SPar3DProjection* par3DProjs;
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 35e3cd4..3bd56ea 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -67,34 +67,41 @@ template<typename ProjectionT>
static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
unsigned int iProjectionAngleCount,
ProjectionT*& pProjs,
- float& fOutputScale)
+ SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjs);
+#if 0
// TODO: Relative instead of absolute
const float EPS = 0.00001f;
if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)
return false;
if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS)
return false;
-
+#endif
// Translate
float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2;
- float factor = 1.0f / pVolGeom->getPixelLengthX();
+ float fx = 1.0f / pVolGeom->getPixelLengthX();
+ float fy = 1.0f / pVolGeom->getPixelLengthY();
+ float fz = 1.0f / pVolGeom->getPixelLengthZ();
for (int i = 0; i < iProjectionAngleCount; ++i) {
// CHECKME: Order of scaling and translation
pProjs[i].translate(dx, dy, dz);
- pProjs[i].scale(factor);
+ pProjs[i].scale(fx, fy, fz);
}
+ params.fVolScaleX = pVolGeom->getPixelLengthX();
+ params.fVolScaleY = pVolGeom->getPixelLengthY();
+ params.fVolScaleZ = pVolGeom->getPixelLengthZ();
+
// CHECKME: Check factor
- fOutputScale *= pVolGeom->getPixelLengthX();
+ //params.fOutputScale *= pVolGeom->getPixelLengthX();
return true;
}
@@ -108,10 +115,8 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
dims.iVolY = pVolGeom->getGridRowCount();
dims.iVolZ = pVolGeom->getGridSliceCount();
dims.iProjAngles = pProjGeom->getProjectionCount();
- dims.iProjU = pProjGeom->getDetectorColCount(),
- dims.iProjV = pProjGeom->getDetectorRowCount(),
- dims.iRaysPerDetDim = 1;
- dims.iRaysPerVoxelDim = 1;
+ dims.iProjU = pProjGeom->getDetectorColCount();
+ dims.iProjV = pProjGeom->getDetectorRowCount();
if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0)
return false;
@@ -124,7 +129,7 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CParallelProjectionGeometry3D* pProjGeom,
- SPar3DProjection*& pProjs, float& fOutputScale)
+ SPar3DProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -141,16 +146,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CParallelVecProjectionGeometry3D* pProjGeom,
- SPar3DProjection*& pProjs, float& fOutputScale)
+ SPar3DProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -164,16 +167,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CConeProjectionGeometry3D* pProjGeom,
- SConeProjection*& pProjs, float& fOutputScale)
+ SConeProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -192,16 +193,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CConeVecProjectionGeometry3D* pProjGeom,
- SConeProjection*& pProjs, float& fOutputScale)
+ SConeProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -215,9 +214,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
@@ -227,7 +224,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CProjectionGeometry3D* pProjGeom,
SPar3DProjection*& pParProjs,
SConeProjection*& pConeProjs,
- float& fOutputScale)
+ SProjectorParams3D& params)
{
const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
@@ -240,13 +237,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
if (conegeom) {
- ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, params);
} else if (conevec3dgeom) {
- ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, params);
} else if (par3dgeom) {
- ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, params);
} else if (parvec3dgeom) {
- ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, params);
} else {
ok = false;
}
@@ -260,20 +257,17 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
class AstraSIRT3d_internal {
public:
SDimensions3D dims;
+ SProjectorParams3D params;
CUDAProjectionType3d projType;
float* angles;
float fOriginSourceDistance;
float fOriginDetectorDistance;
- float fSourceZ;
- float fDetSize;
float fRelaxation;
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fOutputScale;
-
bool initialized;
bool setStartReconstruction;
@@ -305,12 +299,9 @@ AstraSIRT3d::AstraSIRT3d()
pData->dims.iProjAngles = 0;
pData->dims.iProjU = 0;
pData->dims.iProjV = 0;
- pData->dims.iRaysPerDetDim = 1;
- pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
pData->parprojs = 0;
- pData->fOutputScale = 1.0f;
pData->fRelaxation = 1.0f;
@@ -361,7 +352,7 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pData->parprojs, pData->projs,
- pData->fOutputScale);
+ pData->params);
if (!ok)
return false;
@@ -386,8 +377,8 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)
return false;
- pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- pData->dims.iRaysPerDetDim = iDetectorSuperSampling;
+ pData->params.iRaysPerVoxelDim = iVoxelSuperSampling;
+ pData->params.iRaysPerDetDim = iDetectorSuperSampling;
return true;
}
@@ -447,9 +438,9 @@ bool AstraSIRT3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
+ ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);
} else {
- ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
+ ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->params);
}
if (!ok)
@@ -652,19 +643,16 @@ float AstraSIRT3d::computeDiffNorm()
class AstraCGLS3d_internal {
public:
SDimensions3D dims;
+ SProjectorParams3D params;
CUDAProjectionType3d projType;
float* angles;
float fOriginSourceDistance;
float fOriginDetectorDistance;
- float fSourceZ;
- float fDetSize;
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fOutputScale;
-
bool initialized;
bool setStartReconstruction;
@@ -696,12 +684,9 @@ AstraCGLS3d::AstraCGLS3d()
pData->dims.iProjAngles = 0;
pData->dims.iProjU = 0;
pData->dims.iProjV = 0;
- pData->dims.iRaysPerDetDim = 1;
- pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
pData->parprojs = 0;
- pData->fOutputScale = 1.0f;
pData->initialized = false;
pData->setStartReconstruction = false;
@@ -750,7 +735,7 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pData->parprojs, pData->projs,
- pData->fOutputScale);
+ pData->params);
if (!ok)
return false;
@@ -774,8 +759,8 @@ bool AstraCGLS3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)
return false;
- pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- pData->dims.iRaysPerDetDim = iDetectorSuperSampling;
+ pData->params.iRaysPerVoxelDim = iVoxelSuperSampling;
+ pData->params.iRaysPerDetDim = iDetectorSuperSampling;
return true;
}
@@ -829,9 +814,9 @@ bool AstraCGLS3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
+ ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);
} else {
- ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
+ ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->params);
}
if (!ok)
@@ -1039,23 +1024,23 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
Cuda3DProjectionKernel projKernel)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerDetDim = iDetectorSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerDetDim = iDetectorSuperSampling;
if (iDetectorSuperSampling == 0)
return false;
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
@@ -1093,10 +1078,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
if (pParProjs) {
switch (projKernel) {
case ker3d_default:
- ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, params);
break;
case ker3d_sum_square_weights:
- ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale);
+ ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, params);
break;
default:
assert(false);
@@ -1104,7 +1089,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
} else {
switch (projKernel) {
case ker3d_default:
- ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, params);
break;
default:
assert(false);
@@ -1129,21 +1114,20 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1179,9 +1163,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
}
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);
ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
@@ -1204,21 +1188,21 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1255,9 +1239,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
processSino3D<opSet>(D_projData, 1.0f, dims);
if (pParProjs)
- ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, params);
processVol3D<opInvert>(D_pixelWeight, dims);
if (!ok) {
@@ -1272,9 +1256,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
ok &= zeroVolumeData(D_volumeData, dims);
// Do BP into D_volumeData
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);
// Multiply with weights
processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
@@ -1305,83 +1289,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
}
-
-
-bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- const CVolumeGeometry3D* pVolGeom,
- const CConeProjectionGeometry3D* pProjGeom,
- bool bShortScan,
- int iGPUIndex, int iVoxelSuperSampling, const float* filter)
-{
- SDimensions3D dims;
-
- bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
-
- // TODO: Check that pVolGeom is normalized, since we don't support
- // other volume geometries yet
-
- if (!ok)
- return false;
-
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-
- if (iVoxelSuperSampling == 0)
- return false;
-
- if (iGPUIndex != -1) {
- cudaSetDevice(iGPUIndex);
- cudaError_t err = cudaGetLastError();
-
- // Ignore errors caused by calling cudaSetDevice multiple times
- if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
- return false;
- }
-
- cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- ok = D_volumeData.ptr;
- if (!ok)
- return false;
-
- cudaPitchedPtr D_projData = allocateProjectionData(dims);
- ok = D_projData.ptr;
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- return false;
- }
-
- ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU);
-
- ok &= zeroVolumeData(D_volumeData, dims);
-
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
- return false;
- }
-
- float fOriginSourceDistance = pProjGeom->getOriginSourceDistance();
- float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance();
- float fDetUSize = pProjGeom->getDetectorSpacingX();
- float fDetVSize = pProjGeom->getDetectorSpacingY();
- const float *pfAngles = pProjGeom->getProjectionAngles();
-
-
- // TODO: Offer interface for SrcZ, DetZ
- ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance,
- fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize,
- dims, pfAngles, bShortScan, filter);
-
- ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
-
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
-
- return ok;
-
-}
-
-
-
-
}
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index dde1347..93bf576 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -37,11 +37,6 @@ namespace astra {
// TODO: Switch to a class hierarchy as with the 2D algorithms
-enum Cuda3DProjectionKernel {
- ker3d_default = 0,
- ker3d_sum_square_weights
-};
-
class CProjectionGeometry3D;
class CParallelProjectionGeometry3D;
class CParallelVecProjectionGeometry3D;
@@ -50,6 +45,10 @@ class CConeVecProjectionGeometry3D;
class CVolumeGeometry3D;
class AstraSIRT3d_internal;
+using astraCUDA3d::Cuda3DProjectionKernel;
+using astraCUDA3d::ker3d_default;
+using astraCUDA3d::ker3d_sum_square_weights;
+
class _AstraExport AstraSIRT3d {
public:
@@ -291,7 +290,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CProjectionGeometry3D* pProjGeom,
SPar3DProjection*& pParProjs,
SConeProjection*& pConeProjs,
- float& fOutputScale);
+ astraCUDA3d::SProjectorParams3D& params);
_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,
const CVolumeGeometry3D* pVolGeom,
@@ -310,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje
const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iVoxelSuperSampling);
-_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- const CVolumeGeometry3D* pVolGeom,
- const CConeProjectionGeometry3D* pProjGeom,
- bool bShortScan,
- int iGPUIndex, int iVoxelSuperSampling, const float* filter);
-
-
}
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index dd0e8a0..0b0331e 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData,
CGLS cgls;
bool ok = true;
- ok &= cgls.setConeGeometry(dims, angles, 1.0f);
+ ok &= cgls.setConeGeometry(dims, angles, SProjectorParams3D());
if (D_maskData.ptr)
ok &= cgls.enableVolumeMask();
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 4a41f6a..6bd9d16 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -77,6 +77,7 @@ bool bindProjDataTexture(const cudaArray* array)
//__launch_bounds__(32*16, 4)
+template<bool FDKWEIGHT>
__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
int angleOffset, const astraCUDA3d::SDimensions3D dims,
float fOutputScale)
@@ -134,7 +135,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
fU = fUNum * fr;
fV = fVNum * fr;
float fVal = tex3D(gT_coneProjTexture, fU, fAngle, fV);
- Z[idx] += fVal; // fr*fr*fVal;
+ if (FDKWEIGHT)
+ Z[idx] += fr*fr*fVal;
+ else
+ Z[idx] += fVal;
fUNum += fCu.z;
fVNum += fCv.z;
@@ -154,7 +158,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
// supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -185,12 +189,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
if (endZ > dims.iVolZ)
endZ = dims.iVolZ;
- float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ const float fSubStep = 1.0f/iRaysPerVoxelDim;
- fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
@@ -216,11 +220,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
const float fCdc = gC_C[12*angle+11];
float fXs = fX;
- for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) {
+ for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {
float fYs = fY;
- for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) {
+ for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {
float fZs = fZ;
- for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
+ for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
@@ -248,10 +252,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
bindProjDataTexture(D_projArray);
+ float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ;
+
for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) {
unsigned int angleCount = g_MaxAngles;
if (th + angleCount > dims.iProjAngles)
@@ -295,10 +301,12 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (dims.iRaysPerVoxelDim == 1)
- dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ if (params.bFDKWeighting)
+ dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ else if (params.iRaysPerVoxelDim == 1)
+ dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -315,14 +323,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
+ bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h
index 4d3d2dd..a6e2c23 100644
--- a/cuda/3d/cone_bp.h
+++ b/cuda/3d/cone_bp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index 13b184f..fefcdc1 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -128,6 +128,18 @@ struct DIR_Z {
__device__ float z(float f0, float f1, float f2) const { return f0; }
};
+struct SCALE_CUBE {
+ float fOutputScale;
+ __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; }
+};
+
+struct SCALE_NONCUBE {
+ float fScale1;
+ float fScale2;
+ float fOutputScale;
+ __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; }
+};
+
// threadIdx: x = ??? detector (u?)
// y = relative angle
@@ -135,11 +147,12 @@ struct DIR_Z {
// blockIdx: x = ??? detector (u+v?)
// y = angle block
-template<class COORD>
+template<class COORD, class SCALE>
__global__ void cone_FP_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims,
+ SCALE sc)
{
COORD c;
@@ -188,7 +201,7 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch,
const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ);
const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ);
- const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;
+ const float fDistCorr = sc.scale(a1, a2);
float fVal = 0.0f;
@@ -214,7 +227,8 @@ template<class COORD>
__global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims, int iRaysPerDetDim,
+ SCALE_NONCUBE sc)
{
COORD c;
@@ -245,7 +259,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
if (endSlice > c.nSlices(dims))
endSlice = c.nSlices(dims);
- const float fSubStep = 1.0f/dims.iRaysPerDetDim;
+ const float fSubStep = 1.0f/iRaysPerDetDim;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
@@ -255,9 +269,9 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
float fV = 0.0f;
float fdU = detectorU - 0.5f + 0.5f*fSubStep;
- for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
+ for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
float fdV = detectorV - 0.5f + 0.5f*fSubStep;
- for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
+ for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX;
const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY;
@@ -272,7 +286,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ);
const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ);
- const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;
+ const float fDistCorr = sc.scale(a1, a2);
float fVal = 0.0f;
@@ -294,14 +308,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
}
}
- D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim);
+ D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);
}
}
bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[angleCount];
@@ -336,6 +350,36 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
unsigned int blockEnd = 0;
int blockDirection = 0;
+ bool cube = true;
+ if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001)
+ cube = false;
+ if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001)
+ cube = false;
+
+ SCALE_CUBE scube;
+ scube.fOutputScale = params.fOutputScale * params.fVolScaleX;
+
+ SCALE_NONCUBE snoncubeX;
+ float fS1 = params.fVolScaleY / params.fVolScaleX;
+ snoncubeX.fScale1 = fS1 * fS1;
+ float fS2 = params.fVolScaleZ / params.fVolScaleX;
+ snoncubeX.fScale2 = fS2 * fS2;
+ snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX;
+
+ SCALE_NONCUBE snoncubeY;
+ fS1 = params.fVolScaleX / params.fVolScaleY;
+ snoncubeY.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleY;
+ snoncubeY.fScale2 = fS2 * fS2;
+ snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY;
+
+ SCALE_NONCUBE snoncubeZ;
+ fS1 = params.fVolScaleX / params.fVolScaleZ;
+ snoncubeZ.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleZ;
+ snoncubeZ.fScale2 = fS2 * fS2;
+ snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ;
+
// timeval t;
// tic(t);
@@ -373,22 +417,31 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);
else
- cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX);
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);
else
- cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY);
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);
else
- cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ);
}
}
@@ -414,7 +467,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
bool ConeFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer volume to array
@@ -434,7 +487,7 @@ bool ConeFP(cudaPitchedPtr D_volumeData,
ret = ConeFP_Array_internal(D_subprojData,
dims, iEndAngle - iAngle, angles + iAngle,
- fOutputScale);
+ params);
if (!ret)
break;
}
diff --git a/cuda/3d/cone_fp.h b/cuda/3d/cone_fp.h
index 969a6d8..62c9caa 100644
--- a/cuda/3d/cone_fp.h
+++ b/cuda/3d/cone_fp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool ConeFP_Array(cudaArray *D_volArray,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool ConeFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h
index 5437a85..eb7045f 100644
--- a/cuda/3d/dims3d.h
+++ b/cuda/3d/dims3d.h
@@ -37,6 +37,13 @@ namespace astraCUDA3d {
using astra::SConeProjection;
using astra::SPar3DProjection;
+
+enum Cuda3DProjectionKernel {
+ ker3d_default = 0,
+ ker3d_sum_square_weights
+};
+
+
struct SDimensions3D {
unsigned int iVolX;
unsigned int iVolY;
@@ -44,8 +51,25 @@ struct SDimensions3D {
unsigned int iProjAngles;
unsigned int iProjU; // number of detectors in the U direction
unsigned int iProjV; // number of detectors in the V direction
+};
+
+struct SProjectorParams3D {
+ SProjectorParams3D() :
+ iRaysPerDetDim(1), iRaysPerVoxelDim(1),
+ fOutputScale(1.0f),
+ fVolScaleX(1.0f), fVolScaleY(1.0f), fVolScaleZ(1.0f),
+ ker(ker3d_default),
+ bFDKWeighting(false)
+ { }
+
unsigned int iRaysPerDetDim;
unsigned int iRaysPerVoxelDim;
+ float fOutputScale;
+ float fVolScaleX;
+ float fVolScaleY;
+ float fVolScaleZ;
+ Cuda3DProjectionKernel ker;
+ bool bFDKWeighting;
};
}
diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu
index 4899ad1..bbac0be 100644
--- a/cuda/3d/fdk.cu
+++ b/cuda/3d/fdk.cu
@@ -41,174 +41,26 @@ $Id$
#include "dims3d.h"
#include "arith3d.h"
+#include "cone_bp.h"
#include "../2d/fft.h"
-typedef texture<float, 3, cudaReadModeElementType> texture3D;
-
-static texture3D gT_coneProjTexture;
+#include "../../include/astra/Logging.h"
namespace astraCUDA3d {
-static const unsigned int g_volBlockZ = 16;
-
-static const unsigned int g_anglesPerBlock = 64;
-static const unsigned int g_volBlockX = 32;
-static const unsigned int g_volBlockY = 16;
-
static const unsigned int g_anglesPerWeightBlock = 16;
static const unsigned int g_detBlockU = 32;
static const unsigned int g_detBlockV = 32;
-static const unsigned g_MaxAngles = 2048;
+static const unsigned g_MaxAngles = 12000;
-__constant__ float gC_angle_sin[g_MaxAngles];
-__constant__ float gC_angle_cos[g_MaxAngles];
__constant__ float gC_angle[g_MaxAngles];
// per-detector u/v shifts?
-static bool bindProjDataTexture(const cudaArray* array)
-{
- cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-
- gT_coneProjTexture.addressMode[0] = cudaAddressModeBorder;
- gT_coneProjTexture.addressMode[1] = cudaAddressModeBorder;
- gT_coneProjTexture.addressMode[2] = cudaAddressModeBorder;
- gT_coneProjTexture.filterMode = cudaFilterModeLinear;
- gT_coneProjTexture.normalized = false;
-
- cudaBindTextureToArray(gT_coneProjTexture, array, channelDesc);
- // TODO: error value?
-
- return true;
-}
-
-
-__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims)
-{
- float* volData = (float*)D_volData;
-
- int endAngle = startAngle + g_anglesPerBlock;
- if (endAngle > dims.iProjAngles)
- endAngle = dims.iProjAngles;
-
- // threadIdx: x = rel x
- // y = rel y
-
- // blockIdx: x = x + y
- // y = z
-
-
- // TO TRY: precompute part of detector intersection formulas in shared mem?
- // TO TRY: inner loop over z, gather ray values in shared mem
-
- const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x;
- const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y;
-
- if (X > dims.iVolX)
- return;
- if (Y > dims.iVolY)
- return;
-
- const int startZ = blockIdx.y * g_volBlockZ;
- int endZ = startZ + g_volBlockZ;
- if (endZ > dims.iVolZ)
- endZ = dims.iVolZ;
-
- float fX = X - 0.5f*dims.iVolX + 0.5f;
- float fY = Y - 0.5f*dims.iVolY + 0.5f;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ;
-
- const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f;
- const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ);
-
- // Note re. fZ/rV_base: the computations below are all relative to the
- // optical axis, so we do the Z-adjustments beforehand.
-
- for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
- {
-
- float fVal = 0.0f;
- float fAngle = startAngle + 0.5f;
-
- for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f)
- {
-
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
-
- const float fR = fSrcOrigin;
- const float fD = fR - fX * sin_theta + fY * cos_theta;
- float fWeight = fR / fD;
- fWeight *= fWeight;
-
- const float fScaleFactor = (fR + fDetOrigin) / fD;
- const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize;
- const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize;
-
- fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV);
-
- }
-
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal;
-// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f;
-// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); }
- }
-
-}
-
-
-bool FDK_BP(cudaPitchedPtr D_volumeData,
- cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles)
-{
- // transfer projections to array
-
- cudaArray* cuArray = allocateProjectionArray(dims);
- transferProjectionsToArray(D_projData, cuArray, dims);
-
- bindProjDataTexture(cuArray);
-
- float* angle_sin = new float[dims.iProjAngles];
- float* angle_cos = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
- angle_sin[i] = sinf(angles[i]);
- angle_cos[i] = cosf(angles[i]);
- }
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- assert(e1 == cudaSuccess);
- assert(e2 == cudaSuccess);
-
- delete[] angle_sin;
- delete[] angle_cos;
-
- dim3 dimBlock(g_volBlockX, g_volBlockY);
-
- dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ);
-
- // timeval t;
- // tic(t);
-
- for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims);
- }
-
- cudaTextForceKernelsCompletion();
-
- cudaFreeArray(cuArray);
-
- // printf("%f\n", toc(t));
-
- return true;
-}
-
-__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims)
+__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -230,21 +82,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig
const float fT = fCentralRayLength * fCentralRayLength + fU * fU;
- float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ;
+ float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift;
+
+ //const float fW = fCentralRayLength;
+ //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles;
+ const float fW1 = fSrcOrigin * fDetUSize * fDetVSize;
+ const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
const float fRayLength = sqrtf(fT + fV * fV);
- const float fWeight = fCentralRayLength / fRayLength;
+ const float fWeight = fW / fRayLength;
projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight;
- fV += 1.0f;
+ fV += fDetVSize;
}
}
-__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
+__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -305,7 +162,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un
// Perform the FDK pre-weighting and filtering
bool FDK_PreWeight(cudaPitchedPtr D_projData,
float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
+ float fZShift,
float fDetUSize, float fDetVSize, bool bShortScan,
const SDimensions3D& dims, const float* angles)
{
@@ -318,23 +175,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
int projPitch = D_projData.pitch/sizeof(float);
- devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims);
+ devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims);
cudaTextForceKernelsCompletion();
- if (bShortScan) {
+ if (bShortScan && dims.iProjAngles > 1) {
+ ASTRA_DEBUG("Doing Parker weighting");
// We do short-scan Parker weighting
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles,
+ // First, determine (in a very basic way) the interval that's
+ // been scanned. We assume angles[0] is one of the endpoints of the
+ // range.
+ float fdA = angles[1] - angles[0];
+
+ while (fdA < -M_PI)
+ fdA += 2*M_PI;
+ while (fdA >= M_PI)
+ fdA -= 2*M_PI;
+
+ float fAngleBase;
+ if (fdA >= 0.0f) {
+ // going up from angles[0]
+ fAngleBase = angles[dims.iProjAngles - 1];
+ } else {
+ // going down from angles[0]
+ fAngleBase = angles[0];
+ }
+
+ // We pick the highest end of the range, and then
+ // move all angles so they fall in (-2pi,0]
+
+ float *fRelAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ float f = angles[i] - fAngleBase;
+ while (f > 0)
+ f -= 2*M_PI;
+ while (f <= -2*M_PI)
+ f += 2*M_PI;
+ fRelAngles[i] = f;
+
+ }
+
+ cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles,
dims.iProjAngles*sizeof(float), 0,
cudaMemcpyHostToDevice);
assert(!e1);
+ delete[] fRelAngles;
- // TODO: detector pixel size!
- float fCentralFanAngle = atanf((dims.iProjU*0.5f) /
+ float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) /
(fSrcOrigin + fDetOrigin));
- devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims);
+ devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims);
}
@@ -344,10 +235,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
bool FDK_Filter(cudaPitchedPtr D_projData,
cufftComplex * D_filter,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
- float fDetUSize, float fDetVSize, bool bShortScan,
- const SDimensions3D& dims, const float* angles)
+ const SDimensions3D& dims)
{
// The filtering is a regular ramp filter per detector line.
@@ -392,10 +280,9 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
bool FDK(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles, bool bShortScan,
- const float* filter)
+ const SConeProjection* angles,
+ const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan,
+ const float* pfFilter)
{
bool ok;
// Generate filter
@@ -404,20 +291,53 @@ bool FDK(cudaPitchedPtr D_volumeData,
int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+
+ // NB: We don't support arbitrary cone_vec geometries here.
+ // Only those that are vertical sub-geometries
+ // (cf. CompositeGeometryManager) of regular cone geometries.
+ assert(dims.iProjAngles > 0);
+ const SConeProjection& p0 = angles[0];
+
+ // assuming U is in the XY plane, V is parallel to Z axis
+ float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX;
+ float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY;
+ float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ;
+
+ float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY);
+ float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY);
+ float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY);
+ float fDetVSize = abs(p0.fDetVZ);
+
+ float fZShift = fDetCZ - p0.fSrcZ;
+
+ float *pfAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ // FIXME: Sign/order
+ pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI;
+ }
+
+
+#if 1
ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ fZShift, fDetUSize, fDetVSize,
+ bShortScan, dims, pfAngles);
+#else
+ ok = true;
+#endif
+ delete[] pfAngles;
+
if (!ok)
return false;
+#if 1
cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];
memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize);
- if (filter==NULL){
+ if (pfFilter == 0){
genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
- }else{
- for(int i=0;i<dims.iProjAngles * iHalfFFTSize;i++){
- pHostFilter[i].x = filter[i];
+ } else {
+ for (int i = 0; i < dims.iProjAngles * iHalfFFTSize; i++) {
+ pHostFilter[i].x = pfFilter[i];
pHostFilter[i].y = 0;
}
}
@@ -433,28 +353,25 @@ bool FDK(cudaPitchedPtr D_volumeData,
- ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ ok = FDK_Filter(D_projData, D_filter, dims);
// Clean up filter
freeComplexOnDevice(D_filter);
-
+#endif
if (!ok)
return false;
// Perform BP
- ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ,
- fDetUSize, fDetVSize, dims, angles);
+ params.bFDKWeighting = true;
+
+ //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles);
+ ok = ConeBP(D_volumeData, D_projData, dims, angles, params);
if (!ok)
return false;
- processVol3D<opMul>(D_volumeData,
- (M_PI / 2.0f) / (float)dims.iProjAngles, dims);
-
return true;
}
diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h
index 7a9d318..fc0df20 100644
--- a/cuda/3d/fdk.h
+++ b/cuda/3d/fdk.h
@@ -35,18 +35,16 @@ namespace astraCUDA3d {
bool FDK_PreWeight(cudaPitchedPtr D_projData,
float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
+ float fZShift,
float fDetUSize, float fDetVSize, bool bShortScan,
const SDimensions3D& dims, const float* angles);
bool FDK(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles, bool bShortScan,
+ const SConeProjection* angles,
+ const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan,
const float* filter);
-
}
#endif
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index 0320117..cb15ab9 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -38,6 +38,7 @@ $Id$
#include "cone_bp.h"
#include "par3d_fp.h"
#include "par3d_bp.h"
+#include "fdk.h"
#include "astra/Logging.h"
@@ -193,17 +194,17 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
{
SDimensions3D dims;
+ SProjectorParams3D params;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
#if 1
- dims.iRaysPerDetDim = iDetectorSuperSampling;
+ params.iRaysPerDetDim = iDetectorSuperSampling;
if (iDetectorSuperSampling == 0)
return false;
#else
- dims.iRaysPerDetDim = 1;
astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default;
#endif
@@ -211,11 +212,9 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale = 1.0f;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (pParProjs) {
#if 0
@@ -230,10 +229,10 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
switch (projKernel) {
case astra::ker3d_default:
- ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
break;
case astra::ker3d_sum_square_weights:
- ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale);
+ ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
break;
default:
ok = false;
@@ -241,7 +240,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
} else {
switch (projKernel) {
case astra::ker3d_default:
- ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+ ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
break;
default:
ok = false;
@@ -254,35 +253,59 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
#if 1
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-#else
- dims.iRaysPerVoxelDim = 1;
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
#endif
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale = 1.0f;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (pParProjs)
- ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
else
- ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+ ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
return ok;
}
+bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter)
+{
+ SDimensions3D dims;
+ SProjectorParams3D params;
+
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
+ return false;
+
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
+
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ params);
+
+ if (!ok || !pConeProjs)
+ return false;
+
+ ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan, pfFilter);
+
+ return ok;
+
+
+
+}
+
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
index 6fff80b..bb8b091 100644
--- a/cuda/3d/mem3d.h
+++ b/cuda/3d/mem3d.h
@@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling);
+bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter = 0);
}
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index cafab46..491ee2f 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -143,7 +143,7 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
}
// supersampling version
-__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
+__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -174,13 +174,13 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
if (endZ > dims.iVolZ)
endZ = dims.iVolZ;
- float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
+ float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
- const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ const float fSubStep = 1.0f/iRaysPerVoxelDim;
- fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
@@ -201,11 +201,11 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
const float fCvc = gC_C[8*angle+7];
float fXs = fX;
- for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) {
+ for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {
float fYs = fY;
- for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) {
+ for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {
float fZs = fZ;
- for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
+ for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
@@ -228,10 +228,12 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
bindProjDataTexture(D_projArray);
+ float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ;
+
for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) {
unsigned int angleCount = g_MaxAngles;
if (th + angleCount > dims.iProjAngles)
@@ -274,10 +276,10 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (dims.iRaysPerVoxelDim == 1)
+ if (params.iRaysPerVoxelDim == 1)
dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -293,14 +295,14 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
+ bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, params);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h
index f1fc62d..3efad19 100644
--- a/cuda/3d/par3d_bp.h
+++ b/cuda/3d/par3d_bp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index 3ce3d42..6f9ce40 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -128,6 +128,18 @@ struct DIR_Z {
__device__ float z(float f0, float f1, float f2) const { return f0; }
};
+struct SCALE_CUBE {
+ float fOutputScale;
+ __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; }
+};
+
+struct SCALE_NONCUBE {
+ float fScale1;
+ float fScale2;
+ float fOutputScale;
+ __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; }
+};
+
// threadIdx: x = u detector
@@ -136,11 +148,12 @@ struct DIR_Z {
// y = angle block
-template<class COORD>
+template<class COORD, class SCALE>
__global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims,
+ SCALE sc)
{
COORD c;
@@ -161,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
@@ -186,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;
-
float fVal = 0.0f;
float f0 = startSlice + 0.5f;
@@ -218,7 +230,8 @@ template<class COORD>
__global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims, int iRaysPerDetDim,
+ SCALE_NONCUBE sc)
{
COORD c;
@@ -239,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
-
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
@@ -251,7 +266,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
if (endSlice > c.nSlices(dims))
endSlice = c.nSlices(dims);
- const float fSubStep = 1.0f/dims.iRaysPerDetDim;
+ const float fSubStep = 1.0f/iRaysPerDetDim;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
@@ -259,9 +274,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
float fV = 0.0f;
float fdU = detectorU - 0.5f + 0.5f*fSubStep;
- for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
+ for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
float fdV = detectorV - 0.5f + 0.5f*fSubStep;
- for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
+ for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
/* Trace ray in direction Ray to (detectorU,detectorV) from */
/* X = startSlice to X = endSlice */
@@ -274,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;
float fVal = 0.0f;
@@ -295,13 +307,13 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
f2 += a2;
}
- fVal *= fDistCorr;
fV += fVal;
}
}
- D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim);
+ fV *= fDistCorr;
+ D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);
}
}
@@ -324,7 +336,8 @@ template<class COORD>
__global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims,
+ SCALE_NONCUBE sc)
{
COORD c;
@@ -345,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
@@ -370,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale;
-
float fVal = 0.0f;
float f0 = startSlice + 0.5f;
@@ -385,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
for (int s = startSlice; s < endSlice; ++s)
{
- fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr;
+ fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims));
f0 += 1.0f;
f1 += a1;
f2 += a2;
}
+ fVal *= fDistCorr * fDistCorr;
D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;
}
}
@@ -401,7 +414,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[dims.iProjAngles];
@@ -436,6 +449,36 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
unsigned int blockEnd = 0;
int blockDirection = 0;
+ bool cube = true;
+ if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001)
+ cube = false;
+ if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001)
+ cube = false;
+
+ SCALE_CUBE scube;
+ scube.fOutputScale = params.fOutputScale * params.fVolScaleX;
+
+ SCALE_NONCUBE snoncubeX;
+ float fS1 = params.fVolScaleY / params.fVolScaleX;
+ snoncubeX.fScale1 = fS1 * fS1;
+ float fS2 = params.fVolScaleZ / params.fVolScaleX;
+ snoncubeX.fScale2 = fS2 * fS2;
+ snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX;
+
+ SCALE_NONCUBE snoncubeY;
+ fS1 = params.fVolScaleX / params.fVolScaleY;
+ snoncubeY.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleY;
+ snoncubeY.fScale2 = fS2 * fS2;
+ snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY;
+
+ SCALE_NONCUBE snoncubeZ;
+ fS1 = params.fVolScaleX / params.fVolScaleZ;
+ snoncubeZ.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleZ;
+ snoncubeZ.fScale2 = fS2 * fS2;
+ snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ;
+
// timeval t;
// tic(t);
@@ -473,22 +516,31 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);
else
- par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX);
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);
else
- par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY);
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ if (cube)
+ par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube);
+ else
+ par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);
else
- par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ);
}
}
@@ -514,7 +566,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
bool Par3DFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer volume to array
cudaArray* cuArray = allocateVolumeArray(dims);
@@ -533,7 +585,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,
ret = Par3DFP_Array_internal(D_subprojData,
dims, iEndAngle - iAngle, angles + iAngle,
- fOutputScale);
+ params);
if (!ret)
break;
}
@@ -548,7 +600,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,
bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[dims.iProjAngles];
@@ -583,6 +635,28 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
unsigned int blockEnd = 0;
int blockDirection = 0;
+ SCALE_NONCUBE snoncubeX;
+ float fS1 = params.fVolScaleY / params.fVolScaleX;
+ snoncubeX.fScale1 = fS1 * fS1;
+ float fS2 = params.fVolScaleZ / params.fVolScaleX;
+ snoncubeX.fScale2 = fS2 * fS2;
+ snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX;
+
+ SCALE_NONCUBE snoncubeY;
+ fS1 = params.fVolScaleX / params.fVolScaleY;
+ snoncubeY.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleY;
+ snoncubeY.fScale2 = fS2 * fS2;
+ snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY;
+
+ SCALE_NONCUBE snoncubeZ;
+ fS1 = params.fVolScaleX / params.fVolScaleZ;
+ snoncubeZ.fScale1 = fS1 * fS1;
+ fS2 = params.fVolScaleY / params.fVolScaleZ;
+ snoncubeZ.fScale2 = fS2 * fS2;
+ snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ;
+
+
// timeval t;
// tic(t);
@@ -620,8 +694,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX);
else
#if 0
par3D_FP_SS_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
@@ -630,8 +704,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
#endif
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY);
else
#if 0
par3D_FP_SS_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
@@ -640,8 +714,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
#endif
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ);
else
#if 0
par3D_FP_SS_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
diff --git a/cuda/3d/par3d_fp.h b/cuda/3d/par3d_fp.h
index 41f204f..5f65cd9 100644
--- a/cuda/3d/par3d_fp.h
+++ b/cuda/3d/par3d_fp.h
@@ -34,17 +34,17 @@ namespace astraCUDA3d {
_AstraExport bool Par3DFP_Array(cudaArray *D_volArray,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 713944b..ba6a159 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -353,7 +353,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData,
SIRT sirt;
bool ok = true;
- ok &= sirt.setConeGeometry(dims, angles, 1.0f);
+ ok &= sirt.setConeGeometry(dims, angles, SProjectorParams3D());
if (D_maskData.ptr)
ok &= sirt.enableVolumeMask();
diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h
index 18dd72f..d30ffd1 100644
--- a/include/astra/CompositeGeometryManager.h
+++ b/include/astra/CompositeGeometryManager.h
@@ -55,6 +55,11 @@ struct SGPUParams {
size_t memory;
};
+struct SFDKSettings {
+ bool bShortScan;
+ const float *pfFilter;
+};
+
class _AstraExport CCompositeGeometryManager {
public:
@@ -127,9 +132,10 @@ public:
CProjector3D *pProjector; // For a `global' geometry. It will not match
// the geometries of the input and output.
+ SFDKSettings FDKSettings;
enum {
- JOB_FP, JOB_BP, JOB_NOP
+ JOB_FP, JOB_BP, JOB_FDK, JOB_NOP
} eType;
enum {
MODE_ADD, MODE_SET
@@ -155,6 +161,9 @@ public:
CFloat32ProjectionData3DMemory *pProjData);
bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
CFloat32ProjectionData3DMemory *pProjData);
+ bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData, bool bShortScan,
+ const float *pfFilter = 0);
bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
diff --git a/include/astra/GeometryUtil3D.h b/include/astra/GeometryUtil3D.h
index e4d73e4..e051240 100644
--- a/include/astra/GeometryUtil3D.h
+++ b/include/astra/GeometryUtil3D.h
@@ -56,19 +56,19 @@ struct SConeProjection {
fDetSZ += dz;
}
- void scale(double factor) {
- fSrcX *= factor;
- fSrcY *= factor;
- fSrcZ *= factor;
- fDetSX *= factor;
- fDetSY *= factor;
- fDetSZ *= factor;
- fDetUX *= factor;
- fDetUY *= factor;
- fDetUZ *= factor;
- fDetVX *= factor;
- fDetVY *= factor;
- fDetVZ *= factor;
+ void scale(double fx, double fy, double fz) {
+ fSrcX *= fx;
+ fSrcY *= fy;
+ fSrcZ *= fz;
+ fDetSX *= fx;
+ fDetSY *= fy;
+ fDetSZ *= fz;
+ fDetUX *= fx;
+ fDetUY *= fy;
+ fDetUZ *= fz;
+ fDetVX *= fx;
+ fDetVY *= fy;
+ fDetVZ *= fz;
}
};
@@ -93,19 +93,19 @@ struct SPar3DProjection {
fDetSY += dy;
fDetSZ += dz;
}
- void scale(double factor) {
- fRayX *= factor;
- fRayY *= factor;
- fRayZ *= factor;
- fDetSX *= factor;
- fDetSY *= factor;
- fDetSZ *= factor;
- fDetUX *= factor;
- fDetUY *= factor;
- fDetUZ *= factor;
- fDetVX *= factor;
- fDetVY *= factor;
- fDetVZ *= factor;
+ void scale(double fx, double fy, double fz) {
+ fRayX *= fx;
+ fRayY *= fy;
+ fRayZ *= fz;
+ fDetSX *= fx;
+ fDetSY *= fy;
+ fDetSZ *= fz;
+ fDetUX *= fx;
+ fDetUY *= fy;
+ fDetUZ *= fz;
+ fDetVX *= fx;
+ fDetVY *= fy;
+ fDetVZ *= fz;
}
};
diff --git a/include/astra/Singleton.h b/include/astra/Singleton.h
index 784c521..9d3c088 100644
--- a/include/astra/Singleton.h
+++ b/include/astra/Singleton.h
@@ -45,11 +45,7 @@ class Singleton {
public:
// constructor
- Singleton() {
- assert(!m_singleton);
- int offset = (uintptr_t)(T*)1 - (uintptr_t)(Singleton<T>*)(T*)1;
- m_singleton = (T*)((uintptr_t)this + offset);
- };
+ Singleton() { }
// destructor
virtual ~Singleton() {
diff --git a/include/astra/Utilities.h b/include/astra/Utilities.h
index 8d7c44d..22adfe2 100644
--- a/include/astra/Utilities.h
+++ b/include/astra/Utilities.h
@@ -85,6 +85,9 @@ _AstraExport std::string doubleToString(double f);
template<typename T>
_AstraExport std::string toString(T f);
+
+_AstraExport void splitString(std::vector<std::string> &items, const std::string& s, const char *delim);
+
}
diff --git a/matlab/mex/mexHelpFunctions.cpp b/matlab/mex/mexHelpFunctions.cpp
index 13c4ade..d957aea 100644
--- a/matlab/mex/mexHelpFunctions.cpp
+++ b/matlab/mex/mexHelpFunctions.cpp
@@ -33,11 +33,6 @@ $Id$
#include "mexHelpFunctions.h"
#include "astra/Utilities.h"
-#include <algorithm>
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/classification.hpp>
-
using namespace std;
using namespace astra;
@@ -362,8 +357,8 @@ mxArray* stringToMxArray(std::string input)
// split rows
std::vector<std::string> row_strings;
std::vector<std::string> col_strings;
- boost::split(row_strings, input, boost::is_any_of(";"));
- boost::split(col_strings, row_strings[0], boost::is_any_of(","));
+ StringUtil::splitString(row_strings, input, ";");
+ StringUtil::splitString(col_strings, row_strings[0], ",");
// get dimensions
int rows = row_strings.size();
@@ -375,7 +370,7 @@ mxArray* stringToMxArray(std::string input)
// loop elements
for (unsigned int row = 0; row < rows; row++) {
- boost::split(col_strings, row_strings[row], boost::is_any_of(","));
+ StringUtil::splitString(col_strings, row_strings[row], ",");
// check size
for (unsigned int col = 0; col < col_strings.size(); col++) {
out[col*rows + row] = StringUtil::stringToFloat(col_strings[col]);
@@ -389,7 +384,7 @@ mxArray* stringToMxArray(std::string input)
// split
std::vector<std::string> items;
- boost::split(items, input, boost::is_any_of(","));
+ StringUtil::splitString(items, input, ",");
// init matrix
mxArray* pVector = mxCreateDoubleMatrix(1, items.size(), mxREAL);
@@ -402,16 +397,13 @@ mxArray* stringToMxArray(std::string input)
return pVector;
}
- // number
- char* end;
- double content = ::strtod(input.c_str(), &end);
- bool isnumber = !*end;
- if (isnumber) {
- return mxCreateDoubleScalar(content);
+ try {
+ // number
+ return mxCreateDoubleScalar(StringUtil::stringToDouble(input));
+ } catch (const StringUtil::bad_cast &) {
+ // string
+ return mxCreateString(input.c_str());
}
-
- // string
- return mxCreateString(input.c_str());
}
//-----------------------------------------------------------------------------------------
// turn a c++ map into a matlab struct
diff --git a/python/astra/src/PythonPluginAlgorithm.cpp b/python/astra/src/PythonPluginAlgorithm.cpp
index 617c0f4..893db94 100644
--- a/python/astra/src/PythonPluginAlgorithm.cpp
+++ b/python/astra/src/PythonPluginAlgorithm.cpp
@@ -31,8 +31,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/Logging.h"
#include "astra/Utilities.h"
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
#include <iostream>
#include <fstream>
#include <string>
@@ -146,7 +144,7 @@ CPythonPluginAlgorithmFactory::~CPythonPluginAlgorithmFactory(){
PyObject * getClassFromString(std::string str){
std::vector<std::string> items;
- boost::split(items, str, boost::is_any_of("."));
+ StringUtil::splitString(items, str, ".");
PyObject *pyclass = PyImport_ImportModule(items[0].c_str());
if(pyclass==NULL){
logPythonError();
@@ -303,10 +301,10 @@ PyObject * pyStringFromString(std::string str){
PyObject* stringToPythonValue(std::string str){
if(str.find(";")!=std::string::npos){
std::vector<std::string> rows, row;
- boost::split(rows, str, boost::is_any_of(";"));
+ StringUtil::splitString(rows, str, ";");
PyObject *mat = PyList_New(rows.size());
for(unsigned int i=0; i<rows.size(); i++){
- boost::split(row, rows[i], boost::is_any_of(","));
+ StringUtil::splitString(row, rows[i], ",");
PyObject *rowlist = PyList_New(row.size());
for(unsigned int j=0;j<row.size();j++){
PyList_SetItem(rowlist, j, PyFloat_FromDouble(StringUtil::stringToDouble(row[j])));
@@ -317,7 +315,7 @@ PyObject* stringToPythonValue(std::string str){
}
if(str.find(",")!=std::string::npos){
std::vector<std::string> vec;
- boost::split(vec, str, boost::is_any_of(","));
+ StringUtil::splitString(vec, str, ",");
PyObject *veclist = PyList_New(vec.size());
for(unsigned int i=0;i<vec.size();i++){
PyList_SetItem(veclist, i, PyFloat_FromDouble(StringUtil::stringToDouble(vec[i])));
diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 084ba8c..5879aec 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
newjob.eType = j->eType;
newjob.eMode = j->eMode;
newjob.pProjector = j->pProjector;
+ newjob.FDKSettings = j->FDKSettings;
CPart* input = job.pInput->reduce(outputPart.get());
@@ -196,6 +197,69 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
return true;
}
+
+static std::pair<double, double> reduceProjectionVertical(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom)
+{
+ double vmin_g, vmax_g;
+
+ // reduce self to only cover intersection with projection of VolumePart
+ // (Project corners of volume, take bounding box)
+
+ for (int i = 0; i < pProjGeom->getProjectionCount(); ++i) {
+
+ double vol_u[8];
+ double vol_v[8];
+
+ double pixx = pVolGeom->getPixelLengthX();
+ double pixy = pVolGeom->getPixelLengthY();
+ double pixz = pVolGeom->getPixelLengthZ();
+
+ // TODO: Is 0.5 sufficient?
+ double xmin = pVolGeom->getWindowMinX() - 0.5 * pixx;
+ double xmax = pVolGeom->getWindowMaxX() + 0.5 * pixx;
+ double ymin = pVolGeom->getWindowMinY() - 0.5 * pixy;
+ double ymax = pVolGeom->getWindowMaxY() + 0.5 * pixy;
+ double zmin = pVolGeom->getWindowMinZ() - 0.5 * pixz;
+ double zmax = pVolGeom->getWindowMaxZ() + 0.5 * pixz;
+
+ pProjGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]);
+ pProjGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]);
+ pProjGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]);
+ pProjGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]);
+ pProjGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]);
+ pProjGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]);
+ pProjGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]);
+ pProjGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]);
+
+ double vmin = vol_v[0];
+ double vmax = vol_v[0];
+
+ for (int j = 1; j < 8; ++j) {
+ if (vol_v[j] < vmin)
+ vmin = vol_v[j];
+ if (vol_v[j] > vmax)
+ vmax = vol_v[j];
+ }
+
+ if (i == 0 || vmin < vmin_g)
+ vmin_g = vmin;
+ if (i == 0 || vmax > vmax_g)
+ vmax_g = vmax;
+ }
+
+ if (vmin_g < -1.0)
+ vmin_g = -1.0;
+ if (vmax_g > pProjGeom->getDetectorRowCount())
+ vmax_g = pProjGeom->getDetectorRowCount();
+
+ if (vmin_g >= vmax_g)
+ vmin_g = vmax_g = 0.0;
+
+ return std::pair<double, double>(vmin_g, vmax_g);
+
+}
+
+
CCompositeGeometryManager::CPart::CPart(const CPart& other)
{
eType = other.eType;
@@ -236,119 +300,135 @@ size_t CCompositeGeometryManager::CPart::getSize()
}
+static bool testVolumeRange(const std::pair<double, double>& fullRange,
+ const CVolumeGeometry3D *pVolGeom,
+ const CProjectionGeometry3D *pProjGeom,
+ int zmin, int zmax)
+{
+ double pixz = pVolGeom->getPixelLengthZ();
+
+ CVolumeGeometry3D test(pVolGeom->getGridColCount(),
+ pVolGeom->getGridRowCount(),
+ zmax - zmin,
+ pVolGeom->getWindowMinX(),
+ pVolGeom->getWindowMinY(),
+ pVolGeom->getWindowMinZ() + zmin * pixz,
+ pVolGeom->getWindowMaxX(),
+ pVolGeom->getWindowMaxY(),
+ pVolGeom->getWindowMinZ() + zmax * pixz);
+
+
+ std::pair<double, double> subRange = reduceProjectionVertical(&test, pProjGeom);
+
+ // empty
+ if (subRange.first == subRange.second)
+ return true;
+
+ // fully outside of fullRange
+ if (subRange.first >= fullRange.second || subRange.second <= fullRange.first)
+ return true;
+
+ return false;
+}
+
CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other)
{
const CProjectionPart *other = dynamic_cast<const CProjectionPart *>(_other);
assert(other);
- // TODO: Is 0.5 sufficient?
- double umin = -0.5;
- double umax = other->pGeom->getDetectorColCount() + 0.5;
- double vmin = -0.5;
- double vmax = other->pGeom->getDetectorRowCount() + 0.5;
-
- double uu[4];
- double vv[4];
- uu[0] = umin; vv[0] = vmin;
- uu[1] = umin; vv[1] = vmax;
- uu[2] = umax; vv[2] = vmin;
- uu[3] = umax; vv[3] = vmax;
-
- double pixx = pGeom->getPixelLengthX();
- double pixy = pGeom->getPixelLengthY();
- double pixz = pGeom->getPixelLengthZ();
- double xmin = pGeom->getWindowMinX() - 0.5 * pixx;
- double xmax = pGeom->getWindowMaxX() + 0.5 * pixx;
- double ymin = pGeom->getWindowMinY() - 0.5 * pixy;
- double ymax = pGeom->getWindowMaxY() + 0.5 * pixy;
-
- // NB: Flipped
- double zmax = pGeom->getWindowMinZ() - 2.5 * pixz;
- double zmin = pGeom->getWindowMaxZ() + 2.5 * pixz;
-
- // TODO: This isn't as tight as it could be.
- // In particular it won't detect the detector being
- // missed entirely on the u side.
-
- for (int i = 0; i < other->pGeom->getProjectionCount(); ++i) {
- for (int j = 0; j < 4; ++j) {
- double px, py, pz;
-
- other->pGeom->backprojectPointX(i, uu[j], vv[j], xmin, py, pz);
- //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax);
- if (pz < zmin) zmin = pz;
- if (pz > zmax) zmax = pz;
- other->pGeom->backprojectPointX(i, uu[j], vv[j], xmax, py, pz);
- //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax);
- if (pz < zmin) zmin = pz;
- if (pz > zmax) zmax = pz;
-
- other->pGeom->backprojectPointY(i, uu[j], vv[j], ymin, px, pz);
- //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax);
- if (pz < zmin) zmin = pz;
- if (pz > zmax) zmax = pz;
- other->pGeom->backprojectPointY(i, uu[j], vv[j], ymax, px, pz);
- //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax);
- if (pz < zmin) zmin = pz;
- if (pz > zmax) zmax = pz;
+ std::pair<double, double> fullRange = reduceProjectionVertical(pGeom, other->pGeom);
+
+ int top_slice = 0, bottom_slice = 0;
+
+ if (fullRange.first < fullRange.second) {
+
+
+ // TOP SLICE
+
+ int zmin = 0;
+ int zmax = pGeom->getGridSliceCount()-1; // (Don't try empty region)
+
+ // Setting top slice to zmin is always valid.
+
+ while (zmin < zmax) {
+ int zmid = (zmin + zmax + 1) / 2;
+
+ bool ok = testVolumeRange(fullRange, pGeom, other->pGeom,
+ 0, zmid);
+
+ ASTRA_DEBUG("binsearch min: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much");
+
+ if (ok)
+ zmin = zmid;
+ else
+ zmax = zmid - 1;
}
- }
- //ASTRA_DEBUG("coord extent: %f - %f", zmin, zmax);
+ top_slice = zmin;
+
+
+ // BOTTOM SLICE
+
+ zmin = top_slice + 1; // (Don't try empty region)
+ zmax = pGeom->getGridSliceCount();
+
+ // Setting bottom slice to zmax is always valid
- // Clip both zmin and zmax to get rid of extreme (or infinite) values
- // NB: When individual pz values are +/-Inf, the sign is determined
- // by ray direction and on which side of the face the ray passes.
- if (zmin < pGeom->getWindowMinZ() - 2*pixz)
- zmin = pGeom->getWindowMinZ() - 2*pixz;
- if (zmin > pGeom->getWindowMaxZ() + 2*pixz)
- zmin = pGeom->getWindowMaxZ() + 2*pixz;
- if (zmax < pGeom->getWindowMinZ() - 2*pixz)
- zmax = pGeom->getWindowMinZ() - 2*pixz;
- if (zmax > pGeom->getWindowMaxZ() + 2*pixz)
- zmax = pGeom->getWindowMaxZ() + 2*pixz;
+ while (zmin < zmax) {
+ int zmid = (zmin + zmax) / 2;
- zmin = (zmin - pixz - pGeom->getWindowMinZ()) / pixz;
- zmax = (zmax + pixz - pGeom->getWindowMinZ()) / pixz;
+ bool ok = testVolumeRange(fullRange, pGeom, other->pGeom,
+ zmid, pGeom->getGridSliceCount());
- int _zmin = (int)floor(zmin);
- int _zmax = (int)ceil(zmax);
+ ASTRA_DEBUG("binsearch max: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much");
- //ASTRA_DEBUG("index extent: %d - %d", _zmin, _zmax);
+ if (ok)
+ zmax = zmid;
+ else
+ zmin = zmid + 1;
- if (_zmin < 0)
- _zmin = 0;
- if (_zmax > pGeom->getGridSliceCount())
- _zmax = pGeom->getGridSliceCount();
+ }
+
+ bottom_slice = zmax;
- if (_zmax <= _zmin) {
- _zmin = _zmax = 0;
}
- //ASTRA_DEBUG("adjusted extent: %d - %d", _zmin, _zmax);
+
+ ASTRA_DEBUG("found extent: %d - %d", top_slice, bottom_slice);
+
+ top_slice -= 1;
+ if (top_slice < 0)
+ top_slice = 0;
+ bottom_slice += 1;
+ if (bottom_slice >= pGeom->getGridSliceCount())
+ bottom_slice = pGeom->getGridSliceCount();
+
+ ASTRA_DEBUG("adjusted extent: %d - %d", top_slice, bottom_slice);
+
+ double pixz = pGeom->getPixelLengthZ();
CVolumePart *sub = new CVolumePart();
sub->subX = this->subX;
sub->subY = this->subY;
- sub->subZ = this->subZ + _zmin;
+ sub->subZ = this->subZ + top_slice;
sub->pData = pData;
- if (_zmin == _zmax) {
+ if (top_slice == bottom_slice) {
sub->pGeom = 0;
} else {
sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
pGeom->getGridRowCount(),
- _zmax - _zmin,
+ bottom_slice - top_slice,
pGeom->getWindowMinX(),
pGeom->getWindowMinY(),
- pGeom->getWindowMinZ() + _zmin * pixz,
+ pGeom->getWindowMinZ() + top_slice * pixz,
pGeom->getWindowMaxX(),
pGeom->getWindowMaxY(),
- pGeom->getWindowMinZ() + _zmax * pixz);
+ pGeom->getWindowMinZ() + bottom_slice * pixz);
}
- ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + _zmin, this->subZ + _zmax);
+ ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d ( %f - %f )", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + top_slice, this->subZ + bottom_slice, pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMinZ() + bottom_slice * pixz);
return sub;
}
@@ -583,7 +663,9 @@ void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::T
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
- int rem = sliceCount % blockSize;
+ int rem = blockSize - (sliceCount % blockSize);
+ if (rem == blockSize)
+ rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
@@ -630,7 +712,9 @@ void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::T
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
- int rem = sliceCount % blockSize;
+ int rem = blockSize - (sliceCount % blockSize);
+ if (rem == blockSize)
+ rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
@@ -677,7 +761,9 @@ void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::T
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
- int rem = sliceCount % blockSize;
+ int rem = blockSize - (sliceCount % blockSize);
+ if (rem == blockSize)
+ rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
@@ -742,62 +828,16 @@ void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, s
}
+
CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other)
{
const CVolumePart *other = dynamic_cast<const CVolumePart *>(_other);
assert(other);
- double vmin_g, vmax_g;
-
- // reduce self to only cover intersection with projection of VolumePart
- // (Project corners of volume, take bounding box)
-
- for (int i = 0; i < pGeom->getProjectionCount(); ++i) {
-
- double vol_u[8];
- double vol_v[8];
-
- double pixx = other->pGeom->getPixelLengthX();
- double pixy = other->pGeom->getPixelLengthY();
- double pixz = other->pGeom->getPixelLengthZ();
-
- // TODO: Is 0.5 sufficient?
- double xmin = other->pGeom->getWindowMinX() - 0.5 * pixx;
- double xmax = other->pGeom->getWindowMaxX() + 0.5 * pixx;
- double ymin = other->pGeom->getWindowMinY() - 0.5 * pixy;
- double ymax = other->pGeom->getWindowMaxY() + 0.5 * pixy;
- double zmin = other->pGeom->getWindowMinZ() - 0.5 * pixz;
- double zmax = other->pGeom->getWindowMaxZ() + 0.5 * pixz;
-
- pGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]);
- pGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]);
- pGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]);
- pGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]);
- pGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]);
- pGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]);
- pGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]);
- pGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]);
-
- double vmin = vol_v[0];
- double vmax = vol_v[0];
-
- for (int j = 1; j < 8; ++j) {
- if (vol_v[j] < vmin)
- vmin = vol_v[j];
- if (vol_v[j] > vmax)
- vmax = vol_v[j];
- }
-
- if (i == 0 || vmin < vmin_g)
- vmin_g = vmin;
- if (i == 0 || vmax > vmax_g)
- vmax_g = vmax;
- }
-
- // fprintf(stderr, "v extent: %f %f\n", vmin_g, vmax_g);
-
- int _vmin = (int)floor(vmin_g - 1.0f);
- int _vmax = (int)ceil(vmax_g + 1.0f);
+ std::pair<double, double> r = reduceProjectionVertical(other->pGeom, pGeom);
+ // fprintf(stderr, "v extent: %f %f\n", r.first, r.second);
+ int _vmin = (int)floor(r.first - 1.0);
+ int _vmax = (int)ceil(r.second + 1.0);
if (_vmin < 0)
_vmin = 0;
if (_vmax > pGeom->getDetectorRowCount())
@@ -837,7 +877,11 @@ void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManage
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
- int rem = sliceCount % blockSize;
+ int rem = blockSize - (sliceCount % blockSize);
+ if (rem == blockSize)
+ rem = 0;
+
+ ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
int newsubX = x;
@@ -879,7 +923,11 @@ void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManage
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
- int rem = sliceCount % blockSize;
+ int rem = blockSize - (sliceCount % blockSize);
+ if (rem == blockSize)
+ rem = 0;
+
+ ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int z = -(rem / 2); z < sliceCount; z += blockSize) {
int newsubZ = z;
@@ -992,6 +1040,27 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
return doJobs(L);
}
+
+bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData, bool bShortScan,
+ const float *pfFilter)
+{
+ if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) {
+ ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required");
+ return false;
+ }
+
+ SJob job = createJobBP(pProjector, pVolData, pProjData);
+ job.eType = SJob::JOB_FDK;
+ job.FDKSettings.bShortScan = bShortScan;
+ job.FDKSettings.pfFilter = pfFilter;
+
+ TJobList L;
+ L.push_back(job);
+
+ return doJobs(L);
+}
+
bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
{
ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume");
@@ -1185,7 +1254,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
if (!ok) ASTRA_ERROR("Error copying input data to GPU");
- if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) {
+ switch (j.eType) {
+ case CCompositeGeometryManager::SJob::JOB_FP:
+ {
assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));
assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get()));
@@ -1194,7 +1265,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
if (!ok) ASTRA_ERROR("Error performing sub-FP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
- } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) {
+ }
+ break;
+ case CCompositeGeometryManager::SJob::JOB_BP:
+ {
assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
@@ -1203,7 +1277,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
if (!ok) ASTRA_ERROR("Error performing sub-BP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
- } else {
+ }
+ break;
+ case CCompositeGeometryManager::SJob::JOB_FDK:
+ {
+ assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
+ assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
+
+ if (srcdims.subx || srcdims.suby) {
+ ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK");
+ ok = false;
+ } else {
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK");
+
+ ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter);
+ if (!ok) ASTRA_ERROR("Error performing sub-FDK");
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done");
+ }
+ }
+ break;
+ default:
assert(false);
}
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index bc15a3d..15abed4 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -32,6 +32,7 @@ $Id$
#include "astra/CudaProjector3D.h"
#include "astra/ConeProjectionGeometry3D.h"
+#include "astra/CompositeGeometryManager.h"
#include "astra/Logging.h"
@@ -84,6 +85,17 @@ bool CCudaFDKAlgorithm3D::_check()
const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry();
ASTRA_CONFIG_CHECK(dynamic_cast<const CConeProjectionGeometry3D*>(projgeom), "CUDA_FDK", "Error setting FDK geometry");
+
+ const CVolumeGeometry3D* volgeom = m_pReconstruction->getGeometry();
+ bool cube = true;
+ if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthY() - 1.0) > 0.00001)
+ cube = false;
+ if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthZ() - 1.0) > 0.00001)
+ cube = false;
+ ASTRA_CONFIG_CHECK(cube, "CUDA_FDK", "Voxels must be cubes for FDK");
+
+
+
return true;
}
@@ -219,20 +231,28 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)
CFloat32VolumeData3DMemory* pReconMem = dynamic_cast<CFloat32VolumeData3DMemory*>(m_pReconstruction);
ASTRA_ASSERT(pReconMem);
-
- bool ok = true;
-
const float *filter = NULL;
- if(m_iFilterDataId!=-1){
- const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(m_iFilterDataId));
- filter = pFilterData->getDataConst();
+ if (m_iFilterDataId != -1) {
+ const CFloat32ProjectionData2D *pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(m_iFilterDataId));
+ if (pFilterData)
+ filter = pFilterData->getDataConst();
}
+#if 0
+ bool ok = true;
+
ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(),
&volgeom, conegeom,
m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling, filter);
ASTRA_ASSERT(ok);
+#endif
+
+ CCompositeGeometryManager cgm;
+
+ cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan, filter);
+
+
}
//----------------------------------------------------------------------------------------
diff --git a/src/Utilities.cpp b/src/Utilities.cpp
index c9740bf..8b0ca94 100644
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -28,10 +28,6 @@ $Id$
#include "astra/Utilities.h"
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/classification.hpp>
-
#include <sstream>
#include <locale>
#include <iomanip>
@@ -84,18 +80,16 @@ std::vector<double> stringToDoubleVector(const std::string &s)
template<typename T>
std::vector<T> stringToVector(const std::string& s)
{
- // split
- std::vector<std::string> items;
- boost::split(items, s, boost::is_any_of(",;"));
-
- // init list
std::vector<T> out;
- out.resize(items.size());
+ size_t current = 0;
+ size_t next;
+ do {
+ next = s.find_first_of(",;", current);
+ std::string t = s.substr(current, next - current);
+ out.push_back(stringTo<T>(t));
+ current = next + 1;
+ } while (next != std::string::npos);
- // loop elements
- for (unsigned int i = 0; i < items.size(); i++) {
- out[i] = stringTo<T>(items[i]);
- }
return out;
}
@@ -120,6 +114,18 @@ std::string doubleToString(double f)
template<> std::string toString(float f) { return floatToString(f); }
template<> std::string toString(double f) { return doubleToString(f); }
+void splitString(std::vector<std::string> &items, const std::string& s,
+ const char *delim)
+{
+ items.clear();
+ size_t current = 0;
+ size_t next;
+ do {
+ next = s.find_first_of(",;", current);
+ items.push_back(s.substr(current, next - current));
+ current = next + 1;
+ } while (next != std::string::npos);
+}
}