From 595546f07898a7c2369059e5217c1f13bcdb34b2 Mon Sep 17 00:00:00 2001 From: TomasKulhanek Date: Fri, 7 Dec 2018 00:46:08 +0000 Subject: return insteadd of exit --- Core/regularisers_GPU/LLT_ROF_GPU_core.cu | 4 +- Core/regularisers_GPU/NonlDiff_GPU_core.cu | 2 +- Core/regularisers_GPU/PatchSelect_GPU_core.cu | 174 +++++++++++++------------- Core/regularisers_GPU/TGV_GPU_core.cu | 2 +- Core/regularisers_GPU/TV_FGP_GPU_core.cu | 2 +- Core/regularisers_GPU/TV_ROF_GPU_core.cu | 2 +- Core/regularisers_GPU/TV_SB_GPU_core.cu | 2 +- Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 2 +- 8 files changed, 95 insertions(+), 95 deletions(-) diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 0228bf0..ac43eb7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -44,11 +44,11 @@ limitations under the License. { \ const cudaError_t error = call; \ if (error != cudaSuccess) \ - { \ + { / \ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index 8048830..f8176eb 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -46,7 +46,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index f558b0f..ba84105 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,7 +19,7 @@ */ #include "PatchSelect_GPU_core.h" - + /* CUDA implementation of non-local weight pre-calculation for non-local priors * Weights and associated indices are stored into pre-allocated arrays and passed * to the regulariser @@ -36,32 +36,32 @@ * 1. AR_i - indeces of i neighbours * 2. AR_j - indeces of j neighbours * 3. Weights_ij - associated weights - */ - -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -#define BLKXSIZE 16 -#define BLKYSIZE 16 -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) -#define M_PI 3.14159265358979323846 -#define EPS 1.0e-8 + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + return; + } +} + +#define BLKXSIZE 16 +#define BLKYSIZE 16 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define M_PI 3.14159265358979323846 +#define EPS 1.0e-8 #define CONSTVECSIZE5 121 #define CONSTVECSIZE7 225 #define CONSTVECSIZE9 361 #define CONSTVECSIZE11 529 #define CONSTVECSIZE13 729 - + __device__ void swap(float *xp, float *yp) { float temp = *xp; @@ -75,9 +75,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp) *yp = temp; } -/********************************************************************************/ -__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +/********************************************************************************/ +__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -85,10 +85,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -139,10 +139,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -} -/********************************************************************************/ -__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +} +/********************************************************************************/ +__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -150,10 +150,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -204,9 +204,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -} -__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +} +__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -214,10 +214,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE9]; unsigned short ind_i[CONSTVECSIZE9]; unsigned short ind_j[CONSTVECSIZE9]; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -269,8 +269,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -278,10 +278,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -333,8 +333,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -342,10 +342,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -398,29 +398,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign } } - + /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) -{ - int deviceCount = -1; // number of devices - cudaGetDeviceCount(&deviceCount); - if (deviceCount == 0) { - fprintf(stderr, "No CUDA devices found\n"); - return; +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; } - - int SearchW_full, SimilW_full, counterG, i, j; + + int SearchW_full, SimilW_full, counterG, i, j; float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; - unsigned short *H_i_d, *H_j_d; + unsigned short *H_i_d, *H_j_d; h2 = h*h; - - dim3 dimBlock(BLKXSIZE,BLKYSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); - - SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */ - SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */ + + dim3 dimBlock(BLKXSIZE,BLKYSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); + + SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */ + SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */ /* generate a 2D Gaussian kernel for NLM procedure */ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); @@ -432,16 +432,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho }} /*main neighb loop */ - /*allocate space on the device*/ - checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) ); + /*allocate space on the device*/ + checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) ); checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); - - /* copy data from the host to the device */ + checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); + + /* copy data from the host to the device */ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); /********************** Run CUDA kernel here ********************/ if (SearchWindow == 5) IndexSelect2D_5_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -450,19 +450,19 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho else if (SearchWindow == 11) IndexSelect2D_11_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else if (SearchWindow == 13) IndexSelect2D_13_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else { - fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); + fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); return;} - checkCudaErrors(cudaPeekAtLastError() ); - checkCudaErrors(cudaDeviceSynchronize()); - /***************************************************************/ - + checkCudaErrors(cudaPeekAtLastError() ); + checkCudaErrors(cudaDeviceSynchronize()); + /***************************************************************/ + checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); - + cudaFree(Ad); cudaFree(H_i_d); cudaFree(H_j_d); cudaFree(Weights_d); - cudaFree(Eucl_Vec_d); + cudaFree(Eucl_Vec_d); } diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 3081011..09a4ec5 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -45,7 +45,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index eab7a44..7466135 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -48,7 +48,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 57de63d..5ae3b6e 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -44,7 +44,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index 68b9221..a97851c 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -47,7 +47,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 80a78da..6040648 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -54,7 +54,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } -- cgit v1.2.3