From 3c0c441a9a6b24a02e10db8c8eda14bb8b3f2150 Mon Sep 17 00:00:00 2001
From: TomasKulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Mon, 17 Dec 2018 09:21:36 +0000
Subject: UPDATE: shared CHECK cuda macros and return int

---
 Core/regularisers_GPU/Diffus_4thO_GPU_core.cu | 16 ++--------
 Core/regularisers_GPU/Diffus_4thO_GPU_core.h  |  2 +-
 Core/regularisers_GPU/LLT_ROF_GPU_core.cu     | 16 ++--------
 Core/regularisers_GPU/LLT_ROF_GPU_core.h      |  2 +-
 Core/regularisers_GPU/NonlDiff_GPU_core.cu    | 19 ++++--------
 Core/regularisers_GPU/NonlDiff_GPU_core.h     |  2 +-
 Core/regularisers_GPU/PatchSelect_GPU_core.cu | 28 ++----------------
 Core/regularisers_GPU/PatchSelect_GPU_core.h  |  2 +-
 Core/regularisers_GPU/TGV_GPU_core.cu         | 17 ++---------
 Core/regularisers_GPU/TGV_GPU_core.h          |  2 +-
 Core/regularisers_GPU/TV_FGP_GPU_core.cu      | 32 ++++----------------
 Core/regularisers_GPU/TV_FGP_GPU_core.h       |  2 +-
 Core/regularisers_GPU/TV_ROF_GPU_core.cu      | 18 +++---------
 Core/regularisers_GPU/TV_ROF_GPU_core.h       |  2 +-
 Core/regularisers_GPU/TV_SB_GPU_core.cu       | 30 +++----------------
 Core/regularisers_GPU/TV_SB_GPU_core.h        |  2 +-
 Core/regularisers_GPU/dTV_FGP_GPU_core.cu     | 33 ++++-----------------
 Core/regularisers_GPU/dTV_FGP_GPU_core.h      |  2 +-
 Core/regularisers_GPU/shared.h                | 42 +++++++++++++++++++++++++++
 19 files changed, 86 insertions(+), 183 deletions(-)
 create mode 100644 Core/regularisers_GPU/shared.h

diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
index 287fdc8..a4dbe70 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
 */ 
 
 #include "Diffus_4thO_GPU_core.h"
+#include "shared.h"
 
 /* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
  * The minimisation is performed using explicit scheme. 
@@ -36,18 +37,6 @@ limitations under the License.
  * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
  */
 
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                                \
-    }                                                                          \
-}
-    
 #define BLKXSIZE 8
 #define BLKYSIZE 8
 #define BLKZSIZE 8
@@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 /********************* MAIN HOST FUNCTION ******************/
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
 {
 		int dimTotal, dev = 0;
 		CHECK(cudaSetDevice(dev));
@@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,
         CHECK(cudaFree(d_input));
         CHECK(cudaFree(d_output));
         CHECK(cudaFree(d_W_Lapl));
+        return 0;
 }
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
index 6314c1f..77d5d79 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
 
 #endif 
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
index 3e41d64..87871be 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
 */ 
 
 #include "LLT_ROF_GPU_core.h"
+#include "shared.h"
 
 /* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
  * 
@@ -40,18 +41,6 @@ limitations under the License.
 * [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
 */
 
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                               \
-    }                                                                          \
-}
-    
 #define BLKXSIZE 8
 #define BLKYSIZE 8
 #define BLKZSIZE 8
@@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa
 /************************ HOST FUNCTION ****************************/
 /*******************************************************************/
 
-extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
 {
 	    // set up device
 		int dev = 0;
@@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f
         CHECK(cudaFree(D1_ROF));
         CHECK(cudaFree(D2_ROF));
         CHECK(cudaFree(D3_ROF));
+        return 0;
 }
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
index 4a19d09..a6bfcc7 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
 
 #endif 
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
index f8176eb..ff7ce4d 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
 */ 
 
 #include "NonlDiff_GPU_core.h"
+#include "shared.h"
 
 /* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
  * The minimisation is performed using explicit scheme. 
@@ -38,18 +39,7 @@ limitations under the License.
  * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
  */
 
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                               \
-    }                                                                          \
-}
-    
+
 #define BLKXSIZE 8
 #define BLKYSIZE 8
 #define BLKZSIZE 8
@@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda
 
 /////////////////////////////////////////////////
 // HOST FUNCTION
-extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
+extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
 {
 	    // set up device
 		int dev = 0;
@@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar,
         CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
         CHECK(cudaFree(d_input));
         CHECK(cudaFree(d_output));
-        //cudaDeviceReset(); 
+        //cudaDeviceReset();
+        return 0;
 }
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h
index afd712b..5fe457e 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.h
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
 
 #endif 
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
index 28d0385..74f59ca 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -19,6 +19,7 @@
  */
 
 #include "PatchSelect_GPU_core.h"
+#include "shared.h"
 
 /* CUDA implementation of non-local weight pre-calculation for non-local priors
  * Weights and associated indices are stored into pre-allocated arrays and passed
@@ -38,30 +39,6 @@
  * 3. Weights_ij - associated weights
  */
 
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                                \
-    }                                                                          \
-}
-
-/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        return;
-    }
-}*/
 
 #define BLKXSIZE 16
 #define BLKYSIZE 16
@@ -414,7 +391,7 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 /********************* MAIN HOST FUNCTION ******************/
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
 {
     int deviceCount = -1; // number of devices
     cudaGetDeviceCount(&deviceCount);
@@ -477,4 +454,5 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
     cudaFree(H_j_d);    
     cudaFree(Weights_d);    
     cudaFree(Eucl_Vec_d);
+    return 0;
 }
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h
index d20fe9f..8c124d3 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.h
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
 
 #endif 
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 09a4ec5..73232a9 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
 */ 
 
 #include "TGV_GPU_core.h"
+#include "shared.h"
 
 /* CUDA implementation of Primal-Dual denoising method for 
  * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
@@ -36,19 +37,6 @@ limitations under the License.
  * References:
  * [1] K. Bredies "Total Generalized Variation"
  */
-
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                               \
-    }                                                                          \
-}
-    
     
 #define BLKXSIZE2D 16
 #define BLKYSIZE2D 16
@@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 /********************* MAIN HOST FUNCTION ******************/
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
 {
 		int dimTotal, dev = 0;
 		CHECK(cudaSetDevice(dev));
@@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl
         CHECK(cudaFree(V2));
         CHECK(cudaFree(V1_old));
         CHECK(cudaFree(V2_old));
+        return 0;
 }
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
index 663378f..5a4eb76 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
 
 #endif 
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index bde3afb..b371c5d 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
 */ 
 
 #include "TV_FGP_GPU_core.h"
+#include "shared.h"
 #include <thrust/device_vector.h>
 #include <thrust/transform_reduce.h>
 
@@ -39,30 +40,6 @@ limitations under the License.
  * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
  */
 
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                                \
-    }                                                                          \
-}
-
-/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        return;
-    }
-}*/
 
 #define BLKXSIZE2D 16
 #define BLKYSIZE2D 16
@@ -366,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 
 ////////////MAIN HOST FUNCTION ///////////////
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
 {
     int deviceCount = -1; // number of devices
     cudaGetDeviceCount(&deviceCount);
     if (deviceCount == 0) {
         fprintf(stderr, "No CUDA devices found\n");
-        return;
+        return -1;
     }
     
     int count = 0, i;
@@ -582,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
             cudaFree(R2);        
             cudaFree(R3);        
     } 
-    //cudaDeviceReset(); 
+    //cudaDeviceReset();
+    return 0;
 }
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h
index 107d243..b28cdf3 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
 #ifndef _TV_FGP_GPU_
 #define _TV_FGP_GPU_
 
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); 
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 
 #endif 
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 5ae3b6e..76f5be9 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -35,18 +35,7 @@ limitations under the License.
 *
 * D. Kazantsev, 2016-18
 */
-
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                             \
-    }                                                                          \
-}
+#include "shared.h"
     
 #define BLKXSIZE 8
 #define BLKYSIZE 8
@@ -304,7 +293,7 @@ __host__ __device__ int sign (float x)
 
 /////////////////////////////////////////////////
 // HOST FUNCTION
-extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
 {
 	    // set up device
 		int dev = 0;
@@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in
         CHECK(cudaFree(d_update));
         CHECK(cudaFree(d_D1));
         CHECK(cudaFree(d_D2));        
-        //cudaDeviceReset(); 
+        //cudaDeviceReset();
+        return 0;
 }
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h
index d772aba..3a09296 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
 
 #endif 
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
index a590981..8c66323 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -39,29 +39,6 @@ limitations under the License.
 */
 
 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                                \
-    }                                                                          \
-}
-
-/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        return;
-    }
-}*/
 
 #define BLKXSIZE2D 16
 #define BLKYSIZE2D 16
@@ -375,13 +352,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 /********************* MAIN HOST FUNCTION ******************/
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
 {
     int deviceCount = -1; // number of devices
     cudaGetDeviceCount(&deviceCount);
     if (deviceCount == 0) {
         fprintf(stderr, "No CUDA devices found\n");
-        return;
+        return -1;
     }
     
 	int ll, DimTotal;
@@ -569,5 +546,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter,
             cudaFree(By);
             cudaFree(Bz);
     } 
-    //cudaDeviceReset(); 
+    //cudaDeviceReset();
+    return 0;
 }
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h
index bdc9219..d44ab77 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.h
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -5,6 +5,6 @@
 #ifndef _SB_TV_GPU_
 #define _SB_TV_GPU_
 
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
 
 #endif 
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index e2c6ecf..0bc3ff0 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */ 
-
+#include "shared.h"
 #include "dTV_FGP_GPU_core.h"
 #include <thrust/device_vector.h>
 #include <thrust/transform_reduce.h>
@@ -45,30 +45,6 @@ limitations under the License.
  */
  
 
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return;                                                                \
-    }                                                                          \
-}
-/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        return;
-    }
-}
-*/
 #define BLKXSIZE2D 16
 #define BLKYSIZE2D 16
 
@@ -479,7 +455,7 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 
 ////////////MAIN HOST FUNCTION ///////////////
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
 {
     int deviceCount = -1; // number of devices
     cudaGetDeviceCount(&deviceCount);
@@ -759,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
             cudaFree(InputRef_y);
             cudaFree(InputRef_z);
             cudaFree(d_InputRef);
-    } 
-    //cudaDeviceReset(); 
+    }
+    //cudaDeviceReset();
+    return 0;
 }
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
index b906636..9020b1a 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
 #ifndef _dTV_FGP_GPU_
 #define _dTV_FGP_GPU_
 
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 
 #endif 
diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h
new file mode 100644
index 0000000..fe98cd6
--- /dev/null
+++ b/Core/regularisers_GPU/shared.h
@@ -0,0 +1,42 @@
+/*shared macros*/
+
+
+/*checks CUDA call, should be used in functions returning <int> value
+if error happens, writes to standard error and explicitly returns -1*/
+#define CHECK(call)                                                            \
+{                                                                              \
+    const cudaError_t error = call;                                            \
+    if (error != cudaSuccess)                                                  \
+    {                                                                          \
+        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
+        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
+                cudaGetErrorString(error));                                    \
+        return -1;                                                             \
+    }                                                                          \
+}
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(call)                                                            \
+{                                                                              \
+    const cudaError_t error = call;                                            \
+    if (error != cudaSuccess)                                                  \
+    {                                                                          \
+        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
+        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
+                cudaGetErrorString(error));                                    \
+        return -1;                                                                \
+    }                                                                          \
+}
+/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        return;
+    }
+}
+*/
+
-- 
cgit v1.2.3