summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Core/regularisers_GPU/Diffus_4thO_GPU_core.cu16
-rw-r--r--Core/regularisers_GPU/Diffus_4thO_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/LLT_ROF_GPU_core.cu16
-rw-r--r--Core/regularisers_GPU/LLT_ROF_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/NonlDiff_GPU_core.cu19
-rw-r--r--Core/regularisers_GPU/NonlDiff_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.cu28
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu17
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_FGP_GPU_core.cu32
-rwxr-xr-xCore/regularisers_GPU/TV_FGP_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_ROF_GPU_core.cu18
-rwxr-xr-xCore/regularisers_GPU/TV_ROF_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_SB_GPU_core.cu30
-rwxr-xr-xCore/regularisers_GPU/TV_SB_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/dTV_FGP_GPU_core.cu33
-rw-r--r--Core/regularisers_GPU/dTV_FGP_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/shared.h42
19 files changed, 86 insertions, 183 deletions
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
index 287fdc8..a4dbe70 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "Diffus_4thO_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
* The minimisation is performed using explicit scheme.
@@ -36,18 +37,6 @@ limitations under the License.
* [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
{
int dimTotal, dev = 0;
CHECK(cudaSetDevice(dev));
@@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,
CHECK(cudaFree(d_input));
CHECK(cudaFree(d_output));
CHECK(cudaFree(d_W_Lapl));
+ return 0;
}
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
index 6314c1f..77d5d79 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
index 3e41d64..87871be 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "LLT_ROF_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
*
@@ -40,18 +41,6 @@ limitations under the License.
* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa
/************************ HOST FUNCTION ****************************/
/*******************************************************************/
-extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f
CHECK(cudaFree(D1_ROF));
CHECK(cudaFree(D2_ROF));
CHECK(cudaFree(D3_ROF));
+ return 0;
}
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
index 4a19d09..a6bfcc7 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
index f8176eb..ff7ce4d 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "NonlDiff_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
* The minimisation is performed using explicit scheme.
@@ -38,18 +39,7 @@ limitations under the License.
* [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
+
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda
/////////////////////////////////////////////////
// HOST FUNCTION
-extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
+extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar,
CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
CHECK(cudaFree(d_input));
CHECK(cudaFree(d_output));
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h
index afd712b..5fe457e 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.h
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
index 28d0385..74f59ca 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -19,6 +19,7 @@
*/
#include "PatchSelect_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of non-local weight pre-calculation for non-local priors
* Weights and associated indices are stored into pre-allocated arrays and passed
@@ -38,30 +39,6 @@
* 3. Weights_ij - associated weights
*/
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
-/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- return;
- }
-}*/
#define BLKXSIZE 16
#define BLKYSIZE 16
@@ -414,7 +391,7 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
@@ -477,4 +454,5 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
cudaFree(H_j_d);
cudaFree(Weights_d);
cudaFree(Eucl_Vec_d);
+ return 0;
}
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h
index d20fe9f..8c124d3 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.h
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
#endif
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 09a4ec5..73232a9 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "TGV_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of Primal-Dual denoising method for
* Total Generilized Variation (TGV)-L2 model [1] (2D case only)
@@ -36,19 +37,6 @@ limitations under the License.
* References:
* [1] K. Bredies "Total Generalized Variation"
*/
-
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
{
int dimTotal, dev = 0;
CHECK(cudaSetDevice(dev));
@@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl
CHECK(cudaFree(V2));
CHECK(cudaFree(V1_old));
CHECK(cudaFree(V2_old));
+ return 0;
}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
index 663378f..5a4eb76 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
#endif
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index bde3afb..b371c5d 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "TV_FGP_GPU_core.h"
+#include "shared.h"
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -39,30 +40,6 @@ limitations under the License.
* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
*/
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
-/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- return;
- }
-}*/
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -366,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
////////////MAIN HOST FUNCTION ///////////////
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
- return;
+ return -1;
}
int count = 0, i;
@@ -582,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
cudaFree(R2);
cudaFree(R3);
}
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h
index 107d243..b28cdf3 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _TV_FGP_GPU_
#define _TV_FGP_GPU_
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 5ae3b6e..76f5be9 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -35,18 +35,7 @@ limitations under the License.
*
* D. Kazantsev, 2016-18
*/
-
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
+#include "shared.h"
#define BLKXSIZE 8
#define BLKYSIZE 8
@@ -304,7 +293,7 @@ __host__ __device__ int sign (float x)
/////////////////////////////////////////////////
// HOST FUNCTION
-extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in
CHECK(cudaFree(d_update));
CHECK(cudaFree(d_D1));
CHECK(cudaFree(d_D2));
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h
index d772aba..3a09296 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
index a590981..8c66323 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -39,29 +39,6 @@ limitations under the License.
*/
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-
-/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- return;
- }
-}*/
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -375,13 +352,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
- return;
+ return -1;
}
int ll, DimTotal;
@@ -569,5 +546,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter,
cudaFree(By);
cudaFree(Bz);
}
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h
index bdc9219..d44ab77 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.h
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _SB_TV_GPU_
#define _SB_TV_GPU_
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index e2c6ecf..0bc3ff0 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
-
+#include "shared.h"
#include "dTV_FGP_GPU_core.h"
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -45,30 +45,6 @@ limitations under the License.
*/
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- return; \
- } \
-}
-/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- return;
- }
-}
-*/
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -479,7 +455,7 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
////////////MAIN HOST FUNCTION ///////////////
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
@@ -759,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
cudaFree(InputRef_y);
cudaFree(InputRef_z);
cudaFree(d_InputRef);
- }
- //cudaDeviceReset();
+ }
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
index b906636..9020b1a 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _dTV_FGP_GPU_
#define _dTV_FGP_GPU_
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h
new file mode 100644
index 0000000..fe98cd6
--- /dev/null
+++ b/Core/regularisers_GPU/shared.h
@@ -0,0 +1,42 @@
+/*shared macros*/
+
+
+/*checks CUDA call, should be used in functions returning <int> value
+if error happens, writes to standard error and explicitly returns -1*/
+#define CHECK(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ return;
+ }
+}
+*/
+