From 3c0c441a9a6b24a02e10db8c8eda14bb8b3f2150 Mon Sep 17 00:00:00 2001 From: TomasKulhanek Date: Mon, 17 Dec 2018 09:21:36 +0000 Subject: UPDATE: shared CHECK cuda macros and return int --- Core/regularisers_GPU/Diffus_4thO_GPU_core.cu | 16 ++-------- Core/regularisers_GPU/Diffus_4thO_GPU_core.h | 2 +- Core/regularisers_GPU/LLT_ROF_GPU_core.cu | 16 ++-------- Core/regularisers_GPU/LLT_ROF_GPU_core.h | 2 +- Core/regularisers_GPU/NonlDiff_GPU_core.cu | 19 ++++-------- Core/regularisers_GPU/NonlDiff_GPU_core.h | 2 +- Core/regularisers_GPU/PatchSelect_GPU_core.cu | 28 ++---------------- Core/regularisers_GPU/PatchSelect_GPU_core.h | 2 +- Core/regularisers_GPU/TGV_GPU_core.cu | 17 ++--------- Core/regularisers_GPU/TGV_GPU_core.h | 2 +- Core/regularisers_GPU/TV_FGP_GPU_core.cu | 32 ++++---------------- Core/regularisers_GPU/TV_FGP_GPU_core.h | 2 +- Core/regularisers_GPU/TV_ROF_GPU_core.cu | 18 +++--------- Core/regularisers_GPU/TV_ROF_GPU_core.h | 2 +- Core/regularisers_GPU/TV_SB_GPU_core.cu | 30 +++---------------- Core/regularisers_GPU/TV_SB_GPU_core.h | 2 +- Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 33 ++++----------------- Core/regularisers_GPU/dTV_FGP_GPU_core.h | 2 +- Core/regularisers_GPU/shared.h | 42 +++++++++++++++++++++++++++ 19 files changed, 86 insertions(+), 183 deletions(-) create mode 100644 Core/regularisers_GPU/shared.h diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu index 287fdc8..a4dbe70 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "Diffus_4thO_GPU_core.h" +#include "shared.h" /* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case) * The minimisation is performed using explicit scheme. @@ -36,18 +37,6 @@ limitations under the License. * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191. */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z) { int dimTotal, dev = 0; CHECK(cudaSetDevice(dev)); @@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, CHECK(cudaFree(d_input)); CHECK(cudaFree(d_output)); CHECK(cudaFree(d_W_Lapl)); + return 0; } diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h index 6314c1f..77d5d79 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 3e41d64..87871be 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "LLT_ROF_GPU_core.h" +#include "shared.h" /* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty. * @@ -40,18 +41,6 @@ limitations under the License. * [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa /************************ HOST FUNCTION ****************************/ /*******************************************************************/ -extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z) { // set up device int dev = 0; @@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f CHECK(cudaFree(D1_ROF)); CHECK(cudaFree(D2_ROF)); CHECK(cudaFree(D3_ROF)); + return 0; } diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h index 4a19d09..a6bfcc7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index f8176eb..ff7ce4d 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "NonlDiff_GPU_core.h" +#include "shared.h" /* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case) * The minimisation is performed using explicit scheme. @@ -38,18 +39,7 @@ limitations under the License. * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432. */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - + #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z) +extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z) { // set up device int dev = 0; @@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_input)); CHECK(cudaFree(d_output)); - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h index afd712b..5fe457e 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.h +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); +extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index 28d0385..74f59ca 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,6 +19,7 @@ */ #include "PatchSelect_GPU_core.h" +#include "shared.h" /* CUDA implementation of non-local weight pre-calculation for non-local priors * Weights and associated indices are stored into pre-allocated arrays and passed @@ -38,30 +39,6 @@ * 3. Weights_ij - associated weights */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - -/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - return; - } -}*/ #define BLKXSIZE 16 #define BLKYSIZE 16 @@ -414,7 +391,7 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); @@ -477,4 +454,5 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho cudaFree(H_j_d); cudaFree(Weights_d); cudaFree(Eucl_Vec_d); + return 0; } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h index d20fe9f..8c124d3 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.h +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); +extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); #endif diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 09a4ec5..73232a9 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "TGV_GPU_core.h" +#include "shared.h" /* CUDA implementation of Primal-Dual denoising method for * Total Generilized Variation (TGV)-L2 model [1] (2D case only) @@ -36,19 +37,6 @@ limitations under the License. * References: * [1] K. Bredies "Total Generalized Variation" */ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total) /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) +extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) { int dimTotal, dev = 0; CHECK(cudaSetDevice(dev)); @@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl CHECK(cudaFree(V2)); CHECK(cudaFree(V1_old)); CHECK(cudaFree(V2_old)); + return 0; } diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h index 663378f..5a4eb76 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.h +++ b/Core/regularisers_GPU/TGV_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); #endif diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index bde3afb..b371c5d 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "TV_FGP_GPU_core.h" +#include "shared.h" #include #include @@ -39,30 +40,6 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - -/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - return; - } -}*/ #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -366,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); - return; + return -1; } int count = 0, i; @@ -582,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in cudaFree(R2); cudaFree(R3); } - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h index 107d243..b28cdf3 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _TV_FGP_GPU_ #define _TV_FGP_GPU_ -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 5ae3b6e..76f5be9 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -35,18 +35,7 @@ limitations under the License. * * D. Kazantsev, 2016-18 */ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} +#include "shared.h" #define BLKXSIZE 8 #define BLKYSIZE 8 @@ -304,7 +293,7 @@ __host__ __device__ int sign (float x) ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) +extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) { // set up device int dev = 0; @@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in CHECK(cudaFree(d_update)); CHECK(cudaFree(d_D1)); CHECK(cudaFree(d_D2)); - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h index d772aba..3a09296 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.h +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index a590981..8c66323 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -39,29 +39,6 @@ limitations under the License. */ // This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} - -/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - return; - } -}*/ #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -375,13 +352,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); - return; + return -1; } int ll, DimTotal; @@ -569,5 +546,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, cudaFree(By); cudaFree(Bz); } - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h index bdc9219..d44ab77 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.h +++ b/Core/regularisers_GPU/TV_SB_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _SB_TV_GPU_ #define _SB_TV_GPU_ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index e2c6ecf..0bc3ff0 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - +#include "shared.h" #include "dTV_FGP_GPU_core.h" #include #include @@ -45,30 +45,6 @@ limitations under the License. */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - return; \ - } \ -} -/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - return; - } -} -*/ #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -479,7 +455,7 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); @@ -759,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f cudaFree(InputRef_y); cudaFree(InputRef_z); cudaFree(d_InputRef); - } - //cudaDeviceReset(); + } + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h index b906636..9020b1a 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _dTV_FGP_GPU_ #define _dTV_FGP_GPU_ -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h new file mode 100644 index 0000000..fe98cd6 --- /dev/null +++ b/Core/regularisers_GPU/shared.h @@ -0,0 +1,42 @@ +/*shared macros*/ + + +/*checks CUDA call, should be used in functions returning value +if error happens, writes to standard error and explicitly returns -1*/ +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + return -1; \ + } \ +} + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + return -1; \ + } \ +} +/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + return; + } +} +*/ + -- cgit v1.2.3