diff options
21 files changed, 1732 insertions, 111 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 26912b9..fecdeb1 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -84,9 +84,9 @@ message("Adding regularisers as a shared library") #set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC") add_library(cilreg SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c + ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_model_core.c #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchBased_Regul_core.c - #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SplitBregman_TV_core.c #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_PD_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c @@ -130,6 +130,7 @@ if (CUDA_FOUND) CUDA_ADD_LIBRARY(cilregcuda SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu ) if (UNIX) diff --git a/Core/regularisers_CPU/SB_TV_core.c b/Core/regularisers_CPU/SB_TV_core.c new file mode 100755 index 0000000..93b4c2c --- /dev/null +++ b/Core/regularisers_CPU/SB_TV_core.c @@ -0,0 +1,368 @@ +/* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "SB_TV_core.h" + +/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1] +* +* Input Parameters: +* 1. Noisy image/volume +* 2. lambda - regularisation parameter +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon - tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter] +* +* Output: +* 1. Filtered/regularized image +* +* This function is based on the Matlab's code and paper by +* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343. +*/ + +float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) +{ + int ll, j, DimTotal; + float re, re1, lambda; + int count = 0; + mu = 1.0f/mu; + lambda = 2.0f*mu; + + if (dimZ <= 1) { + /* 2D case */ + float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL; + DimTotal = dimX*dimY; + + Output_prev = calloc(DimTotal, sizeof(float)); + Dx = calloc(DimTotal, sizeof(float)); + Dy = calloc(DimTotal, sizeof(float)); + Bx = calloc(DimTotal, sizeof(float)); + By = calloc(DimTotal, sizeof(float)); + + copyIm(Input, Output, dimX, dimY, 1); /*initialize */ + + /* begin outer SB iterations */ + for(ll=0; ll<iter; ll++) { + + /* storing old estimate */ + copyIm(Output, Output_prev, dimX, dimY, 1); + + /* perform two GS iterations (normally 2 is enough for the convergence) */ + gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, dimX, dimY, lambda, mu); + copyIm(Output, Output_prev, dimX, dimY, 1); + /*GS iteration */ + gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, dimX, dimY, lambda, mu); + + /* TV-related step */ + if (methodTV == 1) updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, dimX, dimY, lambda); + else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, dimX, dimY, lambda); + + /* update for Bregman variables */ + updBxBy2D(Output, Dx, Dy, Bx, By, dimX, dimY); + + /* check early stopping criteria if epsilon not equal zero */ + if (epsil != 0) { + re = 0.0f; re1 = 0.0f; + for(j=0; j<DimTotal; j++) { + re += pow(Output[j] - Output_prev[j],2); + re1 += pow(Output[j],2); + } + re = sqrt(re)/sqrt(re1); + if (re < epsil) count++; + if (count > 4) break; + } + /*printf("%f %i %i \n", re, ll, count); */ + } + if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(Dx); free(Dy); free(Bx); free(By); + } + else { + /* 3D case */ + float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL; + DimTotal = dimX*dimY*dimZ; + + Output_prev = calloc(DimTotal, sizeof(float)); + Dx = calloc(DimTotal, sizeof(float)); + Dy = calloc(DimTotal, sizeof(float)); + Dz = calloc(DimTotal, sizeof(float)); + Bx = calloc(DimTotal, sizeof(float)); + By = calloc(DimTotal, sizeof(float)); + Bz = calloc(DimTotal, sizeof(float)); + + copyIm(Input, Output, dimX, dimY, dimZ); /*initialize */ + + /* begin outer SB iterations */ + for(ll=0; ll<iter; ll++) { + + /* storing old estimate */ + copyIm(Output, Output_prev, dimX, dimY, dimZ); + + /* perform two GS iterations (normally 2 is enough for the convergence) */ + gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu); + copyIm(Output, Output_prev, dimX, dimY, dimZ); + /*GS iteration */ + gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu); + + /* TV-related step */ + if (methodTV == 1) updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda); + else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda); + + /* update for Bregman variables */ + updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ); + + /* check early stopping criteria if epsilon not equal zero */ + if (epsil != 0) { + re = 0.0f; re1 = 0.0f; + for(j=0; j<DimTotal; j++) { + re += pow(Output[j] - Output_prev[j],2); + re1 += pow(Output[j],2); + } + re = sqrt(re)/sqrt(re1); + if (re < epsil) count++; + if (count > 4) break; + } + /*printf("%f %i %i \n", re, ll, count); */ + } + if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(Dx); free(Dy); free(Dz); free(Bx); free(By); free(Bz); + } + return *Output; +} + +/********************************************************************/ +/***************************2D Functions*****************************/ +/********************************************************************/ +float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu) +{ + float sum, normConst; + int i,j,i1,i2,j1,j2,index; + normConst = 1.0f/(mu + 4.0f*lambda); + +#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum) + for(i=0; i<dimX; i++) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + i2 = i-1; if (i2 < 0) i2 = i+1; + for(j=0; j<dimY; j++) { + /* symmetric boundary conditions (Neuman) */ + j1 = j+1; if (j1 == dimY) j1 = j-1; + j2 = j-1; if (j2 < 0) j2 = j+1; + index = j*dimX+i; + + sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index]; + sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i]; + sum *= lambda; + sum += mu*A[index]; + U[index] = normConst*sum; + }} + return *U; +} + +float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda) +{ + int i,j,i1,j1,index; + float val1, val11, val2, val22, denom_lam; + denom_lam = 1.0f/lambda; +#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + index = j*dimX+i; + + val1 = (U[j*dimX+i1] - U[index]) + Bx[index]; + val2 = (U[j1*dimX+i] - U[index]) + By[index]; + + val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0; + val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0; + + if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0; + if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0; + + }} + return 1; +} +float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda) +{ + int i,j,i1,j1,index; + float val1, val11, val2, denom, denom_lam; + denom_lam = 1.0f/lambda; + +#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + index = j*dimX+i; + + val1 = (U[j*dimX+i1] - U[index]) + Bx[index]; + val2 = (U[j1*dimX+i] - U[index]) + By[index]; + + denom = sqrt(val1*val1 + val2*val2); + + val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f; + + if (denom != 0.0f) { + Dx[index] = val11*(val1/denom); + Dy[index] = val11*(val2/denom); + } + else { + Dx[index] = 0; + Dy[index] = 0; + } + }} + return 1; +} +float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY) +{ + int i,j,i1,j1,index; +#pragma omp parallel for shared(U) private(index,i,j,i1,j1) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + index = j*dimX+i; + + Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index]; + By[index] += (U[j1*dimX+i] - U[index]) - Dy[index]; + }} + return 1; +} + +/********************************************************************/ +/***************************3D Functions*****************************/ +/********************************************************************/ +/*****************************************************************/ +float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu) +{ + float normConst, d_val, b_val, sum; + int i,j,i1,i2,j1,j2,k,k1,k2,index; + normConst = 1.0f/(mu + 6.0f*lambda); +#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + i2 = i-1; if (i2 < 0) i2 = i+1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + j2 = j-1; if (j2 < 0) j2 = j+1; + k1 = k+1; if (k1 == dimZ) k1 = k-1; + k2 = k-1; if (k2 < 0) k2 = k+1; + index = (dimX*dimY)*k + j*dimX+i; + + d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index]; + b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index]; + sum = d_val + b_val; + sum += U_prev[(dimX*dimY)*k + j*dimX+i1] + U_prev[(dimX*dimY)*k + j*dimX+i2] + U_prev[(dimX*dimY)*k + j1*dimX+i] + U_prev[(dimX*dimY)*k + j2*dimX+i] + U_prev[(dimX*dimY)*k1 + j*dimX+i] + U_prev[(dimX*dimY)*k2 + j*dimX+i]; + sum *= lambda; + sum += mu*A[index]; + U[index] = normConst*sum; + }}} + return *U; +} + +float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda) +{ + int i,j,i1,j1,k,k1,index; + float val1, val11, val2, val22, val3, val33, denom_lam; + denom_lam = 1.0f/lambda; +#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + k1 = k+1; if (k1 == dimZ) k1 = k-1; + + val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index]; + val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index]; + val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index]; + + val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f; + val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f; + val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f; + + if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f; + if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f; + if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f; + + }}} + return 1; +} +float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda) +{ + int i,j,i1,j1,k,k1,index; + float val1, val11, val2, val3, denom, denom_lam; + denom_lam = 1.0f/lambda; +#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + k1 = k+1; if (k1 == dimZ) k1 = k-1; + + val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index]; + val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index]; + val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index]; + + denom = sqrt(val1*val1 + val2*val2 + val3*val3); + + val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f; + + if (denom != 0.0f) { + Dx[index] = val11*(val1/denom); + Dy[index] = val11*(val2/denom); + Dz[index] = val11*(val3/denom); + } + else { + Dx[index] = 0; + Dy[index] = 0; + Dz[index] = 0; + } + }}} + return 1; +} +float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ) +{ + int i,j,k,i1,j1,k1,index; +#pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == dimX) i1 = i-1; + j1 = j+1; if (j1 == dimY) j1 = j-1; + k1 = k+1; if (k1 == dimZ) k1 = k-1; + + Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index]; + By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index]; + Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index]; + }}} + return 1; +} diff --git a/Core/regularisers_CPU/SB_TV_core.h b/Core/regularisers_CPU/SB_TV_core.h new file mode 100755 index 0000000..791d951 --- /dev/null +++ b/Core/regularisers_CPU/SB_TV_core.h @@ -0,0 +1,37 @@ +#include <math.h> +#include <stdlib.h> +#include <memory.h> +#include <stdio.h> +#include "omp.h" +#include "utils.h" + +/* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); + +float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu); +float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); +float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); +float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY); + +float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu); +float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); +float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); +float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ); diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index 3fbbcde..c0ae890 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -173,28 +173,6 @@ __global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) if (Output[index] < 0.0f) Output[index] = 0.0f; } } -__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input[index]; - } -} -__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input1[index] - Input2[index]; - } -} /************************************************/ /*****************3D modules*********************/ /************************************************/ @@ -294,8 +272,6 @@ __global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, } return; } - - __global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) { //calculate each thread global index @@ -325,8 +301,19 @@ __global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_tota if (Output[index] < 0.0f) Output[index] = 0.0f; } } +__global__ void FGPcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} -__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +__global__ void FGPcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) { int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -338,6 +325,32 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, Output[index] = Input[index]; } } + +__global__ void FGPResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} + +__global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} + /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// @@ -418,7 +431,7 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in if (epsil != 0.0f) { /* calculate norm - stopping rules using the Thrust library */ - ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -431,16 +444,16 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in if (re < epsil) count++; if (count > 4) break; - copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize); + FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize); + FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize); + FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -526,15 +539,15 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu new file mode 100755 index 0000000..0bc466f --- /dev/null +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -0,0 +1,562 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_SB_GPU_core.h" +#include <thrust/device_vector.h> +#include <thrust/transform_reduce.h> + +/* CUDA implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1] +* +* Input Parameters: +* 1. Noisy image/volume +* 2. lambda - regularisation parameter +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon - tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* 6. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL parameter] +* 7. print information: 0 (off) or 1 (on) [OPTIONAL parameter] +* +* Output: +* 1. Filtered/regularized image +* +* This function is based on the Matlab's code and paper by +* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343. +*/ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ +__global__ void gauss_seidel2D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, float lambda, float mu, float normConst, int N, int M, int ImSize) +{ + + float sum; + int i1,i2,j1,j2; + + //calculate each thread global index + const int i=blockIdx.x*blockDim.x+threadIdx.x; + const int j=blockIdx.y*blockDim.y+threadIdx.y; + + int index = j*N+i; + + if ((i < N) && (j < M)) { + i1 = i+1; if (i1 == N) i1 = i-1; + i2 = i-1; if (i2 < 0) i2 = i+1; + j1 = j+1; if (j1 == M) j1 = j-1; + j2 = j-1; if (j2 < 0) j2 = j+1; + + sum = Dx[j*N+i2] - Dx[index] + Dy[j2*N+i] - Dy[index] - Bx[j*N+i2] + Bx[index] - By[j2*N+i] + By[index]; + sum += U_prev[j*N+i1] + U_prev[j*N+i2] + U_prev[j1*N+i] + U_prev[j2*N+i]; + sum *= lambda; + sum += mu*A[index]; + U[index] = normConst*sum; //Write final result to global memory + } + return; +} +__global__ void updDxDy_shrinkAniso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize) +{ + + int i1,j1; + float val1, val11, val2, val22, denom_lam; + denom_lam = 1.0f/lambda; + + //calculate each thread global index + const int i=blockIdx.x*blockDim.x+threadIdx.x; + const int j=blockIdx.y*blockDim.y+threadIdx.y; + + int index = j*N+i; + + if ((i < N) && (j < M)) { + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + + val1 = (U[j*N+i1] - U[index]) + Bx[index]; + val2 = (U[j1*N+i] - U[index]) + By[index]; + + val11 = abs(val1) - denom_lam; if (val11 < 0) val11 = 0; + val22 = abs(val2) - denom_lam; if (val22 < 0) val22 = 0; + + if (val1 !=0) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0; + if (val2 !=0) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0; + } + return; +} + +__global__ void updDxDy_shrinkIso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize) +{ + + int i1,j1; + float val1, val11, val2, denom_lam, denom; + denom_lam = 1.0f/lambda; + + //calculate each thread global index + const int i=blockIdx.x*blockDim.x+threadIdx.x; + const int j=blockIdx.y*blockDim.y+threadIdx.y; + + int index = j*N+i; + + if ((i < N) && (j < M)) { + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + + val1 = (U[j*N+i1] - U[index]) + Bx[index]; + val2 = (U[j1*N+i] - U[index]) + By[index]; + + denom = sqrt(val1*val1 + val2*val2); + + val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f; + + if (denom != 0.0f) { + Dx[index] = val11*(val1/denom); + Dy[index] = val11*(val2/denom); + } + else { + Dx[index] = 0; + Dy[index] = 0; + } + } + return; +} + +__global__ void updBxBy2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, int N, int M, int ImSize) +{ + int i1,j1; + + //calculate each thread global index + const int i=blockIdx.x*blockDim.x+threadIdx.x; + const int j=blockIdx.y*blockDim.y+threadIdx.y; + + int index = j*N+i; + + if ((i < N) && (j < M)) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + + Bx[index] += (U[j*N+i1] - U[index]) - Dx[index]; + By[index] += (U[j1*N+i] - U[index]) - Dy[index]; + } + return; +} + + +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void gauss_seidel3D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, float mu, float normConst, int N, int M, int Z, int ImSize) +{ + + float sum,d_val,b_val; + int i1,i2,j1,j2,k1,k2; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + i1 = i+1; if (i1 == N) i1 = i-1; + i2 = i-1; if (i2 < 0) i2 = i+1; + j1 = j+1; if (j1 == M) j1 = j-1; + j2 = j-1; if (j2 < 0) j2 = j+1; + k1 = k+1; if (k1 == Z) k1 = k-1; + k2 = k-1; if (k2 < 0) k2 = k+1; + + d_val = Dx[(N*M)*k + j*N+i2] - Dx[index] + Dy[(N*M)*k + j2*N+i] - Dy[index] + Dz[(N*M)*k2 + j*N+i] - Dz[index]; + b_val = -Bx[(N*M)*k + j*N+i2] + Bx[index] - By[(N*M)*k + j2*N+i] + By[index] - Bz[(N*M)*k2 + j*N+i] + Bz[index]; + sum = d_val + b_val; + sum += U_prev[(N*M)*k + j*N+i1] + U_prev[(N*M)*k + j*N+i2] + U_prev[(N*M)*k + j1*N+i] + U_prev[(N*M)*k + j2*N+i] + U_prev[(N*M)*k1 + j*N+i] + U_prev[(N*M)*k2 + j*N+i]; + sum *= lambda; + sum += mu*A[index]; + U[index] = normConst*sum; + } + return; +} +__global__ void updDxDy_shrinkAniso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize) +{ + + int i1,j1,k1; + float val1, val11, val2, val3, val22, val33, denom_lam; + denom_lam = 1.0f/lambda; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + k1 = k+1; if (k1 == Z) k1 = k-1; + + val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index]; + val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index]; + val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index]; + + val11 = abs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f; + val22 = abs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f; + val33 = abs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f; + + if (val1 !=0.0f) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0.0f; + if (val2 !=0.0f) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0.0f; + if (val3 !=0.0f) Dz[index] = (val3/abs(val3))*val33; else Dz[index] = 0.0f; + } + return; +} + +__global__ void updDxDy_shrinkIso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize) +{ + + int i1,j1,k1; + float val1, val11, val2, val3, denom_lam, denom; + denom_lam = 1.0f/lambda; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + k1 = k+1; if (k1 == Z) k1 = k-1; + + val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index]; + val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index]; + val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index]; + + denom = sqrt(val1*val1 + val2*val2 + val3*val3); + + val11 = (denom - denom_lam); if (val11 < 0.0f) val11 = 0.0f; + + if (denom != 0.0f) { + Dx[index] = val11*(val1/denom); + Dy[index] = val11*(val2/denom); + Dz[index] = val11*(val3/denom); + } + else { + Dx[index] = 0.0f; + Dy[index] = 0.0f; + Dz[index] = 0.0f; + } + } + return; +} + +__global__ void updBxBy3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int N, int M, int Z, int ImSize) +{ + int i1,j1,k1; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* symmetric boundary conditions (Neuman) */ + i1 = i+1; if (i1 == N) i1 = i-1; + j1 = j+1; if (j1 == M) j1 = j-1; + k1 = k+1; if (k1 == Z) k1 = k-1; + + Bx[index] += (U[(N*M)*k + i1 + N*j] - U[index]) - Dx[index]; + By[index] += (U[(N*M)*k + i + N*j1] - U[index]) - Dy[index]; + Bz[index] += (U[(N*M)*k1 + i + N*j] - U[index]) - Dz[index]; + } + return; +} + +__global__ void SBcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} + +__global__ void SBcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} + +__global__ void SBResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} + +__global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} + +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +/********************* MAIN HOST FUNCTION ******************/ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int ll, DimTotal; + float re, lambda, normConst; + int count = 0; + mu = 1.0f/mu; + lambda = 2.0f*mu; + + if (dimZ <= 1) { + /*2D verson*/ + DimTotal = dimX*dimY; + normConst = 1.0f/(mu + 4.0f*lambda); + float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(Dx, 0, DimTotal*sizeof(float)); + cudaMemset(Dy, 0, DimTotal*sizeof(float)); + cudaMemset(Bx, 0, DimTotal*sizeof(float)); + cudaMemset(By, 0, DimTotal*sizeof(float)); + + /********************** Run CUDA 2D kernels here ********************/ + /* The main kernel */ + for (ll = 0; ll < iter; ll++) { + + /* storing old value */ + SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* perform two GS iterations (normally 2 is enough for the convergence) */ + gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + /* 2nd GS iteration */ + gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* TV-related step */ + if (methodTV == 1) updDxDy_shrinkAniso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal); + else updDxDy_shrinkIso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal); + + /* update for Bregman variables */ + updBxBy2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + SBResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector<float> d_vec(d_res, d_res + DimTotal); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>())); + thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + } + + } + if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(d_update_prev); + if (epsil != 0.0f) cudaFree(d_res); + cudaFree(Dx); + cudaFree(Dy); + cudaFree(Bx); + cudaFree(By); + } + else { + /*3D verson*/ + DimTotal = dimX*dimY*dimZ; + normConst = 1.0f/(mu + 6.0f*lambda); + float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Dz,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Bz,DimTotal*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(Dx, 0, DimTotal*sizeof(float)); + cudaMemset(Dy, 0, DimTotal*sizeof(float)); + cudaMemset(Dz, 0, DimTotal*sizeof(float)); + cudaMemset(Bx, 0, DimTotal*sizeof(float)); + cudaMemset(By, 0, DimTotal*sizeof(float)); + cudaMemset(Bz, 0, DimTotal*sizeof(float)); + + /********************** Run CUDA 3D kernels here ********************/ + /* The main kernel */ + for (ll = 0; ll < iter; ll++) { + + /* storing old value */ + SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* perform two GS iterations (normally 2 is enough for the convergence) */ + gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + /* 2nd GS iteration */ + gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* TV-related step */ + if (methodTV == 1) updDxDy_shrinkAniso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal); + else updDxDy_shrinkIso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal); + + /* update for Bregman variables */ + updBxBy3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + SBResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, dimZ, DimTotal); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector<float> d_vec(d_res, d_res + DimTotal); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>())); + thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + } + } + if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(d_update_prev); + if (epsil != 0.0f) cudaFree(d_res); + cudaFree(Dx); + cudaFree(Dy); + cudaFree(Dz); + cudaFree(Bx); + cudaFree(By); + cudaFree(Bz); + } + cudaDeviceReset(); +} diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h new file mode 100755 index 0000000..bdc9219 --- /dev/null +++ b/Core/regularisers_GPU/TV_SB_GPU_core.h @@ -0,0 +1,10 @@ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> + +#ifndef _SB_TV_GPU_ +#define _SB_TV_GPU_ + +extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); + +#endif diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 04047a5..d4699f9 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -240,6 +240,20 @@ __global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int Output[index] = Input[index]; } } + +__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} + __global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) { int xIndex = blockDim.x * blockIdx.x + threadIdx.x; @@ -250,7 +264,21 @@ __global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Outpu if (index < num_total) { Output[index] = Input1[index] - Input2[index]; } -} +} + +__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} + /************************************************/ /*****************3D modules*********************/ /************************************************/ @@ -437,32 +465,6 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t if (Output[index] < 0.0f) Output[index] = 0.0f; } } - -__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - Output[index] = Input[index]; - } -} - -__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - Output[index] = Input1[index] - Input2[index]; - } -} /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// @@ -14,11 +14,12 @@ can also be used as image denoising iterative filters. The core modules are writ ## Package modules (regularisers): ### Single-channel -1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) [2D/3D GPU/CPU]; (Ref. 1) -2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D GPU/CPU]; (Ref. 2) +1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) [2D/3D CPU/GPU]; (Ref. 1) +2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D CPU/GPU]; (Ref. 2) +3. Split-Bregman (SB) Total Variation [2D/3D CPU/GPU]; (Ref. 4) ### Multi-channel -1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D GPU/CPU]; (Ref. 4,2) +1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D CPU/GPU]; (Ref. 3,2) ## Installation: @@ -43,12 +44,11 @@ can also be used as image denoising iterative filters. The core modules are writ ### References: 1. Rudin, L.I., Osher, S. and Fatemi, E., 1992. Nonlinear total variation based noise removal algorithms. Physica D: nonlinear phenomena, 60(1-4), pp.259-268. 2. Beck, A. and Teboulle, M., 2009. Fast gradient-based algorithms for constrained total variation image denoising and deblurring problems. IEEE Transactions on Image Processing, 18(11), pp.2419-2434. -3. Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590. -4. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106. +3. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106. +4. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343. ### License: [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) ### Acknowledgments: CCPi-RGL software is a product of the [CCPi](https://www.ccpi.ac.uk/) group and STFC SCD software developers. Any relevant questions/comments can be e-mailed to Daniil Kazantsev at dkazanc@hotmail.com - diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m index dc49d9c..fb55097 100644 --- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m @@ -14,35 +14,47 @@ end vol3D(vol3D < 0) = 0; figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image'); + +lambda_reg = 0.03; % regularsation parameter for all methods %% fprintf('Denoise a volume using the ROF-TV model (CPU) \n'); -lambda_rof = 0.03; % regularisation parameter tau_rof = 0.0025; % time-marching constant iter_rof = 300; % number of ROF iterations -tic; u_rof = ROF_TV(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; +tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; figure; imshow(u_rof(:,:,15), [0 1]); title('ROF-TV denoised volume (CPU)'); %% % fprintf('Denoise a volume using the ROF-TV model (GPU) \n'); -% lambda_rof = 0.03; % regularisation parameter % tau_rof = 0.0025; % time-marching constant % iter_rof = 300; % number of ROF iterations -% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; +% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; % figure; imshow(u_rofG(:,:,15), [0 1]); title('ROF-TV denoised volume (GPU)'); %% fprintf('Denoise a volume using the FGP-TV model (CPU) \n'); -lambda_fgp = 0.03; % regularisation parameter iter_fgp = 300; % number of FGP iterations epsil_tol = 1.0e-05; % tolerance -tic; u_fgp = FGP_TV(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; +tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; figure; imshow(u_fgp(:,:,15), [0 1]); title('FGP-TV denoised volume (CPU)'); %% % fprintf('Denoise a volume using the FGP-TV model (GPU) \n'); -% lambda_fgp = 0.03; % regularisation parameter % iter_fgp = 300; % number of FGP iterations % epsil_tol = 1.0e-05; % tolerance -% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; +% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; % figure; imshow(u_fgpG(:,:,15), [0 1]); title('FGP-TV denoised volume (GPU)'); %% +fprintf('Denoise a volume using the SB-TV model (CPU) \n'); +iter_sb = 150; % number of SB iterations +epsil_tol = 1.0e-05; % tolerance +tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; +figure; imshow(u_sb(:,:,15), [0 1]); title('SB-TV denoised volume (CPU)'); +%% +% fprintf('Denoise a volume using the SB-TV model (GPU) \n'); +% iter_sb = 150; % number of SB iterations +% epsil_tol = 1.0e-05; % tolerance +% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; +% figure; imshow(u_sbG(:,:,15), [0 1]); title('SB-TV denoised volume (GPU)'); +%% + +%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % fprintf('Denoise a volume using the FGP-dTV model (CPU) \n'); % create another volume (reference) with slightly less amount of noise @@ -53,11 +65,10 @@ end vol3D_ref(vol3D_ref < 0) = 0; % vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) -lambda_fgp = 0.03; % regularisation parameter iter_fgp = 300; % number of FGP iterations epsil_tol = 1.0e-05; % tolerance eta = 0.2; % Reference image gradient smoothing constant -tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; figure; imshow(u_fgp_dtv(:,:,15), [0 1]); title('FGP-dTV denoised volume (CPU)'); %% fprintf('Denoise a volume using the FGP-dTV model (GPU) \n'); @@ -70,10 +81,9 @@ end vol3D_ref(vol3D_ref < 0) = 0; % vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) -lambda_fgp = 0.03; % regularisation parameter iter_fgp = 300; % number of FGP iterations epsil_tol = 1.0e-05; % tolerance eta = 0.2; % Reference image gradient smoothing constant -tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; figure; imshow(u_fgp_dtv_g(:,:,15), [0 1]); title('FGP-dTV denoised volume (GPU)'); %%
\ No newline at end of file diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m index 145f2ff..129bedc 100644 --- a/Wrappers/Matlab/demos/demoMatlab_denoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -8,45 +8,55 @@ Im = double(imread('lena_gray_512.tif'))/255; % loading image u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; figure; imshow(u0, [0 1]); title('Noisy image'); +lambda_reg = 0.03; % regularsation parameter for all methods %% fprintf('Denoise using the ROF-TV model (CPU) \n'); -lambda_rof = 0.03; % regularisation parameter tau_rof = 0.0025; % time-marching constant iter_rof = 2000; % number of ROF iterations -tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; +tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); %% % fprintf('Denoise using the ROF-TV model (GPU) \n'); -% lambda_rof = 0.03; % regularisation parameter % tau_rof = 0.0025; % time-marching constant % iter_rof = 2000; % number of ROF iterations -% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc; +% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc; % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)'); %% fprintf('Denoise using the FGP-TV model (CPU) \n'); -lambda_fgp = 0.03; % regularisation parameter iter_fgp = 1000; % number of FGP iterations epsil_tol = 1.0e-06; % tolerance -tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; +tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)'); %% % fprintf('Denoise using the FGP-TV model (GPU) \n'); -% lambda_fgp = 0.03; % regularisation parameter % iter_fgp = 1000; % number of FGP iterations % epsil_tol = 1.0e-05; % tolerance -% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; +% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)'); %% +fprintf('Denoise using the SB-TV model (CPU) \n'); +iter_sb = 150; % number of SB iterations +epsil_tol = 1.0e-06; % tolerance +tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; +figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)'); +%% +% fprintf('Denoise using the SB-TV model (GPU) \n'); +% iter_sb = 150; % number of SB iterations +% epsil_tol = 1.0e-06; % tolerance +% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; +% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)'); +%% +%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % + fprintf('Denoise using the FGP-dTV model (CPU) \n'); % create another image (reference) with slightly less amount of noise u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0; % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) -lambda_fgp = 0.03; % regularisation parameter iter_fgp = 1000; % number of FGP iterations epsil_tol = 1.0e-06; % tolerance eta = 0.2; % Reference image gradient smoothing constant -tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)'); %% % fprintf('Denoise using the FGP-dTV model (GPU) \n'); @@ -54,10 +64,9 @@ figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)'); % u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0; % % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) % -% lambda_fgp = 0.03; % regularisation parameter % iter_fgp = 1000; % number of FGP iterations % epsil_tol = 1.0e-06; % tolerance % eta = 0.2; % Reference image gradient smoothing constant -% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; % figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)'); %% diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex.m b/Wrappers/Matlab/mex_compile/compileCPU_mex.m index 71f345a..c3c82ff 100644 --- a/Wrappers/Matlab/mex_compile/compileCPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileCPU_mex.m @@ -11,10 +11,13 @@ movefile ROF_TV.mex* ../installed/ mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" movefile FGP_TV.mex* ../installed/ +mex SB_TV.c SB_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +movefile SB_TV.mex* ../installed/ + mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" movefile FGP_dTV.mex* ../installed/ -delete ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h +delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h fprintf('%s \n', 'All successfully compiled!'); diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m index f58e9bc..3dbeb8a 100644 --- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m @@ -1,13 +1,13 @@ % execute this mex file in Matlab once -%>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<< +%>>>>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<< % In order to compile CUDA modules one needs to have nvcc-compiler -% installed (see CUDA SDK) -% check it under MATLAB with !nvcc --version -% In the code bellow we provide a full path to nvcc compiler +% installed (see CUDA SDK), check it under MATLAB with !nvcc --version + +% In the code bellow we provide a full explicit path to nvcc compiler % ! paths to matlab and CUDA sdk can be different, modify accordingly ! -% tested on Ubuntu 16.04/MATLAB 2016b +% tested on Ubuntu 16.04/MATLAB 2016b/cuda7.5/gcc4.9 copyfile ../../../Core/regularisers_GPU/ regularisers_GPU/ copyfile ../../../Core/CCPiDefines.h regularisers_GPU/ @@ -23,11 +23,15 @@ movefile ROF_TV_GPU.mex* ../installed/ mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o movefile FGP_TV_GPU.mex* ../installed/ +!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ +mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o +movefile SB_TV_GPU.mex* ../installed/ + !/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o movefile FGP_dTV_GPU.mex* ../installed/ -delete TV_ROF_GPU_core* TV_FGP_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h +delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h fprintf('%s \n', 'All successfully compiled!'); cd ../../ diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c new file mode 100644 index 0000000..d284cac --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c @@ -0,0 +1,89 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "SB_TV_core.h" + +/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1] +* +* Input Parameters: +* 1. Noisy image/volume +* 2. lambda - regularisation parameter +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon - tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter] +* +* Output: +* 1. Filtered/regularized image +* +* This function is based on the Matlab's code and paper by +* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343. +*/ + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch; + const int *dim_array; + float *Input, *Output=NULL, lambda, epsil; + + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + dim_array = mxGetDimensions(prhs[0]); + + /*Handling Matlab input data*/ + if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch"); + + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ + iter = 100; /* default iterations number */ + epsil = 0.0001; /* default tolerance constant */ + methTV = 0; /* default isotropic TV penalty */ + printswitch = 0; /*default print is switched, off - 0 */ + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ + if ((nrhs == 5) || (nrhs == 6)) { + char *penalty_type; + penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + mxFree(penalty_type); + } + if (nrhs == 6) { + printswitch = (int) mxGetScalar(prhs[5]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } + + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + SB_TV_CPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ); +} diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp new file mode 100644 index 0000000..60847d9 --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp @@ -0,0 +1,89 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "TV_SB_GPU_core.h" + +/* CUDA mex-file for implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1] +* +* Input Parameters: +* 1. Noisy image/volume +* 2. lambda - regularisation parameter +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon - tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter] +* +* Output: +* 1. Filtered/regularized image +* +* This function is based on the Matlab's code and paper by +* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343. +*/ + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch; + const int *dim_array; + float *Input, *Output=NULL, lambda, epsil; + + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + dim_array = mxGetDimensions(prhs[0]); + + /*Handling Matlab input data*/ + if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch"); + + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ + iter = 100; /* default iterations number */ + epsil = 0.0001; /* default tolerance constant */ + methTV = 0; /* default isotropic TV penalty */ + printswitch = 0; /*default print is switched, off - 0 */ + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ + if ((nrhs == 5) || (nrhs == 6)) { + char *penalty_type; + penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + mxFree(penalty_type); + } + if (nrhs == 6) { + printswitch = (int) mxGetScalar(prhs[5]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } + + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + TV_SB_GPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ); +} diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py index 376cc9c..50c4374 100644 --- a/Wrappers/Python/ccpi/filters/regularisers.py +++ b/Wrappers/Python/ccpi/filters/regularisers.py @@ -2,8 +2,8 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gpu') """ -from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU -from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU +from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU +from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU def ROF_TV(inputData, regularisation_parameter, iterations, time_marching_parameter,device='cpu'): @@ -42,6 +42,25 @@ def FGP_TV(inputData, regularisation_parameter,iterations, else: raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ .format(device)) +def SB_TV(inputData, regularisation_parameter, iterations, + tolerance_param, methodTV, printM, device='cpu'): + if device == 'cpu': + return TV_SB_CPU(inputData, + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM) + elif device == 'gpu': + return TV_SB_GPU(inputData, + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device)) def FGP_dTV(inputData, refdata, regularisation_parameter, iterations, tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'): if device == 'cpu': diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index 00beb0b..0e4355b 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -141,13 +141,60 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_cpu, cmap="gray") plt.title('{}'.format('CPU results')) +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________SB-TV (2D)__________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(3) +plt.suptitle('Performance of SB-TV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : SB_TV, \ + 'input' : u0,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :150 ,\ + 'tolerance_constant':1e-06,\ + 'methodTV': 0 ,\ + 'printingOut': 0 + } + +print ("#############SB TV CPU####################") +start_time = timeit.default_timer() +sb_cpu = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'cpu') + + +rms = rmse(Im, sb_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("_____________FGP-dTV (2D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(3) +fig = plt.figure(4) plt.suptitle('Performance of FGP-dTV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -223,7 +270,7 @@ print ("_______________ROF-TV (3D)_________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(4) +fig = plt.figure(5) plt.suptitle('Performance of ROF-TV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy 15th slice of a volume') @@ -263,7 +310,7 @@ print ("_______________FGP-TV (3D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(5) +fig = plt.figure(6) plt.suptitle('Performance of FGP-TV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -307,13 +354,59 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the CPU using FGP-TV')) +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________SB-TV (3D)_________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(7) +plt.suptitle('Performance of SB-TV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : SB_TV, \ + 'input' : noisyVol,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :150 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'printingOut': 0 + } + +print ("#############SB TV CPU####################") +start_time = timeit.default_timer() +sb_cpu3D = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'cpu') + +rms = rmse(idealVol, sb_cpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the CPU using SB-TV')) + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("_______________FGP-dTV (3D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(6) +fig = plt.figure(8) plt.suptitle('Performance of FGP-dTV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index 310cf75..d8e2da7 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -218,13 +218,99 @@ if (diff_im.sum() > 1): else: print ("Arrays match") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________SB-TV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(3) +plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : SB_TV, \ + 'input' : u0,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :150 ,\ + 'tolerance_constant':1e-05,\ + 'methodTV': 0 ,\ + 'printingOut': 0 + } + +print ("#############SB-TV CPU####################") +start_time = timeit.default_timer() +sb_cpu = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'cpu') + + +rms = rmse(Im, sb_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + +print ("##############SB TV GPU##################") +start_time = timeit.default_timer() +sb_gpu = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'gpu') + +rms = rmse(Im, sb_gpu) +pars['rmse'] = rms +pars['algorithm'] = SB_TV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) +diff_im = abs(sb_cpu - sb_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("____________FGP-dTV bench___________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(3) +fig = plt.figure(4) plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations') a=fig.add_subplot(1,4,1) a.set_title('Noisy Image') diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index 24a3c88..25d8d85 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -139,12 +139,59 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_gpu, cmap="gray") plt.title('{}'.format('GPU results')) + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") -print ("____________FGP-dTV bench___________________") +print ("____________SB-TV bench___________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot fig = plt.figure(3) +plt.suptitle('Performance of the SB-TV regulariser using the GPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : SB_TV, \ + 'input' : u0,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :150 ,\ + 'tolerance_constant':1e-06,\ + 'methodTV': 0 ,\ + 'printingOut': 0 + } + +print ("##############SB TV GPU##################") +start_time = timeit.default_timer() +sb_gpu = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'gpu') + +rms = rmse(Im, sb_gpu) +pars['rmse'] = rms +pars['algorithm'] = SB_TV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________FGP-dTV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(4) plt.suptitle('Performance of the FGP-dTV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -219,7 +266,7 @@ print ("_______________ROF-TV (3D)_________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(4) +fig = plt.figure(5) plt.suptitle('Performance of ROF-TV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy 15th slice of a volume') @@ -259,7 +306,7 @@ print ("_______________FGP-TV (3D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(5) +fig = plt.figure(6) plt.suptitle('Performance of FGP-TV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -302,13 +349,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the GPU using FGP-TV')) +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________SB-TV (3D)__________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(7) +plt.suptitle('Performance of SB-TV regulariser using the GPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : SB_TV, \ + 'input' : noisyVol,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :100 ,\ + 'tolerance_constant':1e-05,\ + 'methodTV': 0 ,\ + 'printingOut': 0 + } + +print ("#############SB TV GPU####################") +start_time = timeit.default_timer() +sb_gpu3D = SB_TV(pars['input'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['printingOut'],'gpu') + +rms = rmse(idealVol, sb_gpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the GPU using SB-TV')) print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("_______________FGP-dTV (3D)________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(6) +fig = plt.figure(8) plt.suptitle('Performance of FGP-dTV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in index c7ebb5c..0681cc4 100644 --- a/Wrappers/Python/setup-regularisers.py.in +++ b/Wrappers/Python/setup-regularisers.py.in @@ -36,6 +36,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"), os.path.join(".." , ".." , "Core", "regularisers_CPU"), os.path.join(".." , ".." , "Core", "regularisers_GPU" , "TV_FGP" ) , os.path.join(".." , ".." , "Core", "regularisers_GPU" , "TV_ROF" ) , + os.path.join(".." , ".." , "Core", "regularisers_GPU" , "TV_SB" ) , os.path.join(".." , ".." , "Core", "regularisers_GPU" , "dTV_FGP" ) , "."] diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index 1661375..417670d 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -20,6 +20,7 @@ cimport numpy as np cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); @@ -125,6 +126,63 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, printM, dims[2], dims[1], dims[0]) return outputData + +#***************************************************************# +#********************** Total-variation SB *********************# +#***************************************************************# +#*************** Total-variation Split Bregman (SB)*************# +def TV_SB_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM): + if inputData.ndim == 2: + return TV_SB_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM) + elif inputData.ndim == 3: + return TV_SB_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM) + +def TV_SB_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularisation_parameter, + int iterationsNumb, + float tolerance_param, + int methodTV, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + #/* Run SB-TV iterations for 2D data */ + SB_TV_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, + iterationsNumb, + tolerance_param, + methodTV, + printM, + dims[0], dims[1], 1) + + return outputData + +def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularisation_parameter, + int iterationsNumb, + float tolerance_param, + int methodTV, + int printM): + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0], dims[1], dims[2]], dtype='float32') + + #/* Run SB-TV iterations for 3D data */ + SB_TV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, + iterationsNumb, + tolerance_param, + methodTV, + printM, + dims[2], dims[1], dims[0]) + return outputData #****************************************************************# #**************Directional Total-variation FGP ******************# #****************************************************************# diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 18efdcd..36eec95 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -20,6 +20,7 @@ cimport numpy as np cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); # Total-variation Rudin-Osher-Fatemi (ROF) @@ -62,6 +63,27 @@ def TV_FGP_GPU(inputData, methodTV, nonneg, printM) +# Total-variation Split Bregman (SB) +def TV_SB_GPU(inputData, + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM): + if inputData.ndim == 2: + return SBTV2D(inputData, + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM) + elif inputData.ndim == 3: + return SBTV3D(inputData, + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM) # Directional Total-variation Fast-Gradient-Projection (FGP) def dTV_FGP_GPU(inputData, refdata, @@ -197,7 +219,60 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, dims[2], dims[1], dims[0]); return outputData +#***************************************************************# +#********************** Total-variation SB *********************# +#***************************************************************# +#*************** Total-variation Split Bregman (SB)*************# +def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularisation_parameter, + int iterations, + float tolerance_param, + int methodTV, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + TV_SB_GPU_main(&inputData[0,0], &outputData[0,0], + regularisation_parameter, + iterations, + tolerance_param, + methodTV, + printM, + dims[0], dims[1], 1); + + return outputData +def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularisation_parameter, + int iterations, + float tolerance_param, + int methodTV, + int printM): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0], + regularisation_parameter , + iterations, + tolerance_param, + methodTV, + printM, + dims[2], dims[1], dims[0]); + + return outputData #****************************************************************# #**************Directional Total-variation FGP ******************# #****************************************************************# |