diff options
| -rw-r--r-- | Core/regularisers_GPU/LLT_ROF_GPU_core.cu | 4 | ||||
| -rw-r--r-- | Core/regularisers_GPU/NonlDiff_GPU_core.cu | 2 | ||||
| -rw-r--r-- | Core/regularisers_GPU/PatchSelect_GPU_core.cu | 174 | ||||
| -rw-r--r-- | Core/regularisers_GPU/TGV_GPU_core.cu | 2 | ||||
| -rwxr-xr-x | Core/regularisers_GPU/TV_FGP_GPU_core.cu | 2 | ||||
| -rwxr-xr-x | Core/regularisers_GPU/TV_ROF_GPU_core.cu | 2 | ||||
| -rwxr-xr-x | Core/regularisers_GPU/TV_SB_GPU_core.cu | 2 | ||||
| -rw-r--r-- | Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 2 | 
8 files changed, 95 insertions, 95 deletions
| diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 0228bf0..ac43eb7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -44,11 +44,11 @@ limitations under the License.  {                                                                              \      const cudaError_t error = call;                                            \      if (error != cudaSuccess)                                                  \ -    {                                                                          \ +    {                                 /                                         \          fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \          fprintf(stderr, "code: %d, reason: %s\n", error,                       \                  cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ +        return;                                                               \      }                                                                          \  } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index 8048830..f8176eb 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -46,7 +46,7 @@ limitations under the License.          fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \          fprintf(stderr, "code: %d, reason: %s\n", error,                       \                  cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ +        return;                                                               \      }                                                                          \  } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index f558b0f..ba84105 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,7 +19,7 @@   */  #include "PatchSelect_GPU_core.h" -
 +  /* CUDA implementation of non-local weight pre-calculation for non-local priors   * Weights and associated indices are stored into pre-allocated arrays and passed   * to the regulariser @@ -36,32 +36,32 @@   * 1. AR_i - indeces of i neighbours   * 2. AR_j - indeces of j neighbours   * 3. Weights_ij - associated weights - */
 -
 -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
 -#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
 -
 -inline void __checkCudaErrors(cudaError err, const char *file, const int line)
 -{
 -    if (cudaSuccess != err)
 -    {
 -        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
 -                file, line, (int)err, cudaGetErrorString(err));
 -        exit(EXIT_FAILURE);
 -    }
 -}
 -
 -#define BLKXSIZE 16
 -#define BLKYSIZE 16
 -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
 -#define M_PI 3.14159265358979323846
 -#define EPS 1.0e-8
 + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ +    if (cudaSuccess != err) +    { +        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", +                file, line, (int)err, cudaGetErrorString(err)); +        return; +    } +} + +#define BLKXSIZE 16 +#define BLKYSIZE 16 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define M_PI 3.14159265358979323846 +#define EPS 1.0e-8  #define CONSTVECSIZE5 121  #define CONSTVECSIZE7 225  #define CONSTVECSIZE9 361  #define CONSTVECSIZE11 529  #define CONSTVECSIZE13 729 -
 +  __device__ void swap(float *xp, float *yp)   {      float temp = *xp;  @@ -75,9 +75,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp)      *yp = temp;   } -/********************************************************************************/
 -__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +/********************************************************************************/ +__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -85,10 +85,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE5];      unsigned short ind_i[CONSTVECSIZE5];      unsigned short ind_j[CONSTVECSIZE5]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -139,10 +139,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne          H_j_d[index2] = ind_j[x];          Weights_d[index2] = Weight_Vec[x];      } -} 
 -/********************************************************************************/
 -__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +}  +/********************************************************************************/ +__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -150,10 +150,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE7];      unsigned short ind_i[CONSTVECSIZE7];      unsigned short ind_j[CONSTVECSIZE7]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -204,9 +204,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne          H_j_d[index2] = ind_j[x];          Weights_d[index2] = Weight_Vec[x];      } -}
 -__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +} +__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -214,10 +214,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE9];      unsigned short ind_i[CONSTVECSIZE9];      unsigned short ind_j[CONSTVECSIZE9]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -269,8 +269,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne          Weights_d[index2] = Weight_Vec[x];      }                       } -__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -278,10 +278,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign      float Weight_Vec[CONSTVECSIZE11];      unsigned short ind_i[CONSTVECSIZE11];      unsigned short ind_j[CONSTVECSIZE11]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -333,8 +333,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign          Weights_d[index2] = Weight_Vec[x];      }  }  -__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -342,10 +342,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign      float Weight_Vec[CONSTVECSIZE13];      unsigned short ind_i[CONSTVECSIZE13];      unsigned short ind_j[CONSTVECSIZE13]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -398,29 +398,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign      }  }  -   
 +     /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  /********************* MAIN HOST FUNCTION ******************/ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 -extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
 -{
 -    int deviceCount = -1; // number of devices
 -    cudaGetDeviceCount(&deviceCount);
 -    if (deviceCount == 0) {
 -        fprintf(stderr, "No CUDA devices found\n");
 -        return;
 +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +{ +    int deviceCount = -1; // number of devices +    cudaGetDeviceCount(&deviceCount); +    if (deviceCount == 0) { +        fprintf(stderr, "No CUDA devices found\n"); +        return;      }   -      
 -    int SearchW_full, SimilW_full, counterG, i, j;
 +       +    int SearchW_full, SimilW_full, counterG, i, j;      float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;     -    unsigned short *H_i_d, *H_j_d;
 +    unsigned short *H_i_d, *H_j_d;      h2 = h*h; -    
 -    dim3 dimBlock(BLKXSIZE,BLKYSIZE);
 -    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));    
 -       
 -    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */
 -    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */
 +     +    dim3 dimBlock(BLKXSIZE,BLKYSIZE); +    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));     +        +    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */ +    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */      /* generate a 2D Gaussian kernel for NLM procedure */      Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); @@ -432,16 +432,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho      }} /*main neighb loop */ -    /*allocate space on the device*/
 -    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
 +    /*allocate space on the device*/ +    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );      checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );      checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );      checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); -    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
 -
 -    /* copy data from the host to the device */
 +    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); + +    /* copy data from the host to the device */      checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); -    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );    
 +    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );          /********************** Run CUDA kernel here ********************/      if (SearchWindow == 5)  IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -450,19 +450,19 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho      else if (SearchWindow == 11)  IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);      else if (SearchWindow == 13)  IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);      else { -    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
 +    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");          return;}     -    checkCudaErrors(cudaPeekAtLastError() );        
 -    checkCudaErrors(cudaDeviceSynchronize());   
 -    /***************************************************************/    
 -        
 +    checkCudaErrors(cudaPeekAtLastError() );         +    checkCudaErrors(cudaDeviceSynchronize());    +    /***************************************************************/     +              checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );      checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );        checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );    -    
 +          cudaFree(Ad);       cudaFree(H_i_d);       cudaFree(H_j_d);          cudaFree(Weights_d);     -    cudaFree(Eucl_Vec_d);
 +    cudaFree(Eucl_Vec_d);  } diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 3081011..09a4ec5 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -45,7 +45,7 @@ limitations under the License.          fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \          fprintf(stderr, "code: %d, reason: %s\n", error,                       \                  cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ +        return;                                                               \      }                                                                          \  } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index eab7a44..7466135 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -48,7 +48,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)      {          fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",                  file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); +        return;      }  } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 57de63d..5ae3b6e 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -44,7 +44,7 @@ limitations under the License.          fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \          fprintf(stderr, "code: %d, reason: %s\n", error,                       \                  cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ +        return;                                                             \      }                                                                          \  } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index 68b9221..a97851c 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -47,7 +47,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)      {          fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",                  file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); +        return;      }  } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 80a78da..6040648 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -54,7 +54,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)      {          fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",                  file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); +        return;      }  } | 
