summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniil Kazantsev <dkazanc@hotmail.com>2019-04-17 12:03:27 +0100
committerDaniil Kazantsev <dkazanc@hotmail.com>2019-04-17 12:03:27 +0100
commitbfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5 (patch)
tree486f00b26b2af9ef2c7a75bb8e126480b5a5bc97 /src
parent9c57162fef5822369c09b7b36b3637dae5e397c0 (diff)
downloadregularization-bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5.tar.gz
regularization-bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5.tar.bz2
regularization-bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5.tar.xz
regularization-bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5.zip
var reg for GPU, ROF TV model
Diffstat (limited to 'src')
-rw-r--r--src/Core/regularisers_CPU/ROF_TV_core.c5
-rw-r--r--src/Core/regularisers_CPU/ROF_TV_core.h12
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.cu81
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.h4
-rw-r--r--src/Python/src/cpu_regularisers.pyx8
-rw-r--r--src/Python/src/gpu_regularisers.pyx62
6 files changed, 100 insertions, 72 deletions
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
index 955cabc..6951268 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.c
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -31,16 +31,15 @@ int sign(float x) {
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
*
- *
* Input Parameters:
* 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
+ * 2. lambda - regularization parameter (a constant or the same size as input (1))
* 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
* 5. eplsilon: tolerance constant
*
* Output:
- * [1] Regularized image/volume
+ * [1] Regularised image/volume
* [2] Information vector which contains [iteration no., reached tolerance]
*
* This function is based on the paper by
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.h b/src/Core/regularisers_CPU/ROF_TV_core.h
index 8b5e2e4..7e29ddb 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.h
+++ b/src/Core/regularisers_CPU/ROF_TV_core.h
@@ -27,29 +27,25 @@ limitations under the License.
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
*
- *
* Input Parameters:
* 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
+ * 2. lambda - regularization parameter (a constant or the same size as input (1))
* 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
- * 5. eplsilon: tolerance constant
+ * 5. eplsilon: tolerance constant
*
* Output:
- * [1] Regularized image/volume
+ * [1] Regularised image/volume
* [2] Information vector which contains [iteration no., reached tolerance]
*
* This function is based on the paper by
* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- *
- * D. Kazantsev, 2016-18
*/
-
+
#ifdef __cplusplus
extern "C" {
#endif
CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
-
CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lambda, int lambda_is_arr, float tau, long dimX, long dimY, long dimZ);
CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ);
CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ);
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 193cf53..c6c6e88 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -27,17 +27,16 @@ limitations under the License.
*
* Input Parameters:
* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
-* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+* 2. lambda - regularization parameter (a constant or the same size as input (1))
+* 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
* 5. eplsilon: tolerance constant
-
- * Output:
- * [1] Filtered/regularized image/volume
- * [2] Information vector which contains [iteration no., reached tolerance]
-
-
- * This function is based on the paper by
+*
+* Output:
+* [1] Regularised image/volume
+* [2] Information vector which contains [iteration no., reached tolerance]
+*
+* This function is based on the paper by
* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
*/
@@ -121,27 +120,25 @@ __host__ __device__ int sign (float x)
}
}
- __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)
+ __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float *lambdaPar_d, int lambda_is_arr, float tau, int N, int M)
{
int i2, j2;
- float dv1,dv2;
+ float dv1,dv2,lambda_val;
int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
- int index = i + N*j;
+ int index = i + N*j;
+ lambda_val = *(lambdaPar_d + index* lambda_is_arr);
if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
-
/* boundary conditions (Neumann reflections) */
i2 = i - 1; if (i2 < 0) i2 = i+1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
-
/* divergence components */
dv1 = D1[index] - D1[j2*N + i];
dv2 = D2[index] - D2[j*N + i2];
- Update[index] += tau*(lambda*(dv1 + dv2) - (Update[index] - Input[index]));
-
+ Update[index] += tau*(lambda_val*(dv1 + dv2) - (Update[index] - Input[index]));
}
}
/*********************3D case****************************/
@@ -265,19 +262,20 @@ __host__ __device__ int sign (float x)
}
}
- __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)
+ __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float *lambda, int lambda_is_arr, float tau, int dimX, int dimY, int dimZ)
{
- float dv1, dv2, dv3;
+ float dv1, dv2, dv3, lambda_val;
int i1,i2,k1,j1,j2,k2;
int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
- int index = (dimX*dimY)*k + j*dimX+i;
+ int index = (dimX*dimY)*k + j*dimX+i;
+ lambda_val = *(lambda + index* lambda_is_arr);
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
- /* symmetric boundary conditions (Neuman) */
+ /* symmetric boundary conditions (Neuman) */
i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
j1 = j + 1; if (j1 >= dimY) j1 = j-1;
@@ -290,7 +288,7 @@ __host__ __device__ int sign (float x)
dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
- Update[index] += tau*(lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
+ Update[index] += tau*(lambda_val*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
}
}
@@ -348,7 +346,7 @@ __global__ void ROFResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
/////////////////////////////////////////////////
///////////////// HOST FUNCTION /////////////////
-extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z)
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
@@ -360,10 +358,10 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
re = 0.0f;
int ImSize, count, n;
count = 0; n = 0;
- float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL;
+ float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL, *lambdaPar_d=NULL;
if (Z == 0) Z = 1;
- ImSize = N*M*Z;
+ ImSize = N*M*Z;
CHECK(cudaMalloc((void**)&d_input,ImSize*sizeof(float)));
CHECK(cudaMalloc((void**)&d_update,ImSize*sizeof(float)));
CHECK(cudaMalloc((void**)&d_D1,ImSize*sizeof(float)));
@@ -373,6 +371,16 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
CHECK(cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(d_update,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ /*dealing with spatially variant reglariser */
+ if (lambda_is_arr == 1) {
+ CHECK(cudaMalloc((void**)&lambdaPar_d,ImSize*sizeof(float)));
+ CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ }
+ else {
+ CHECK(cudaMalloc((void**)&lambdaPar_d,1*sizeof(float)));
+ CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,1*sizeof(float),cudaMemcpyHostToDevice));
+ }
+
if (Z == 1) {
// TV - 2D case
dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
@@ -391,7 +399,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
CHECK(cudaDeviceSynchronize());
/*running main kernel*/
- TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
+ TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M);
CHECK(cudaDeviceSynchronize());
if ((epsil != 0.0f) && (n % 5 == 0)) {
@@ -417,7 +425,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
}
}
else {
- // TV - 3D case
+ // TV - 3D case
dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
@@ -431,7 +439,6 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
}
-
/* calculate differences */
D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
CHECK(cudaDeviceSynchronize());
@@ -440,7 +447,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
CHECK(cudaDeviceSynchronize());
/*running main kernel*/
- TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
+ TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M, Z);
CHECK(cudaDeviceSynchronize());
if ((epsil != 0.0f) && (n % 5 == 0)) {
@@ -461,12 +468,8 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
re = (reduction/reduction2);
if (re < epsil) count++;
if (count > 3) break;
-
- }
-
-
+ }
}
-
CHECK(cudaFree(d_D3));
}
CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
@@ -475,9 +478,9 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
CHECK(cudaFree(d_update));
CHECK(cudaFree(d_D1));
CHECK(cudaFree(d_D2));
+ CHECK(cudaFree(lambdaPar_d));
infovector[0] = (float)(n); /*iterations number (if stopped earlier based on tolerance)*/
infovector[1] = re; /* reached tolerance */
-
return 0;
}
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
index 0a75124..30514ad 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z);
-#endif
+#endif
diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx
index a2c4c32..9855eca 100644
--- a/src/Python/src/cpu_regularisers.pyx
+++ b/src/Python/src/cpu_regularisers.pyx
@@ -18,7 +18,7 @@ import cython
import numpy as np
cimport numpy as np
-cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int arrayscal, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
+cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ);
cdef extern float SB_TV_CPU_main(float *Input, float *Output, float *infovector, float mu, int iter, float epsil, int methodTV, int dimX, int dimY, int dimZ);
cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
@@ -721,20 +721,20 @@ def MASK_CORR_CPU_2D(np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
np.ndarray[np.uint8_t, ndim=1, mode="c"] select_classes,
int total_classesNum,
int CorrectionWindow):
-
+
cdef long dims[2]
dims[0] = maskData.shape[0]
dims[1] = maskData.shape[1]
select_classes_length = select_classes.shape[0]
-
+
cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] mask_upd = \
np.zeros([dims[0],dims[1]], dtype='uint8')
cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] corr_regions = \
np.zeros([dims[0],dims[1]], dtype='uint8')
# Run the function to process given MASK
- Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length,
+ Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length,
total_classesNum, CorrectionWindow, dims[1], dims[0], 1)
return (mask_upd,corr_regions)
diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx
index 84ee981..8cd8c93 100644
--- a/src/Python/src/gpu_regularisers.pyx
+++ b/src/Python/src/gpu_regularisers.pyx
@@ -20,7 +20,7 @@ cimport numpy as np
CUDAErrorMessage = 'CUDA error'
-cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z);
cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int N, int M, int Z);
cdef extern int TV_SB_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int N, int M, int Z);
cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int N, int M, int Z);
@@ -177,7 +177,7 @@ def Diff4th_GPU(inputData,
#********************** Total-variation ROF *********************#
#****************************************************************#
def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
- float regularisation_parameter,
+ regularisation_parameter,
int iterations,
float time_marching_parameter,
float tolerance_param):
@@ -185,26 +185,39 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
cdef long dims[2]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
-
+ cdef float lambdareg
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] reg
cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
np.ones([2], dtype='float32')
- # Running CUDA code here
- if (TV_ROF_GPU_main(
- &inputData[0,0], &outputData[0,0], &infovec[0],
- regularisation_parameter,
+ if isinstance (regularisation_parameter, np.ndarray):
+ reg = regularisation_parameter.copy()
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0],
+ &reg[0,0], 1,
iterations,
time_marching_parameter,
tolerance_param,
dims[1], dims[0], 1)==0):
- return (outputData,infovec)
+ return (outputData,infovec)
+ else:
+ raise ValueError(CUDAErrorMessage);
else:
- raise ValueError(CUDAErrorMessage);
+ lambdareg = regularisation_parameter
+ if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0],
+ &lambdareg, 0,
+ iterations,
+ time_marching_parameter,
+ tolerance_param,
+ dims[1], dims[0], 1)==0):
+ return (outputData,infovec)
+ else:
+ raise ValueError(CUDAErrorMessage);
def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
- float regularisation_parameter,
+ regularisation_parameter,
int iterations,
float time_marching_parameter,
float tolerance_param):
@@ -213,23 +226,40 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
dims[2] = inputData.shape[2]
-
+ cdef float lambdareg
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] reg
cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
np.ones([2], dtype='float32')
- # Running CUDA code here
- if (TV_ROF_GPU_main(
+ if isinstance (regularisation_parameter, np.ndarray):
+ reg = regularisation_parameter.copy()
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(
&inputData[0,0,0], &outputData[0,0,0], &infovec[0],
- regularisation_parameter,
+ &reg[0,0,0], 1,
iterations,
time_marching_parameter,
tolerance_param,
dims[2], dims[1], dims[0])==0):
- return (outputData,infovec)
+ return (outputData,infovec)
+ else:
+ raise ValueError(CUDAErrorMessage);
else:
- raise ValueError(CUDAErrorMessage);
+ lambdareg = regularisation_parameter
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0], &infovec[0],
+ &lambdareg, 0,
+ iterations,
+ time_marching_parameter,
+ tolerance_param,
+ dims[2], dims[1], dims[0])==0):
+ return (outputData,infovec)
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#****************************************************************#
#********************** Total-variation FGP *********************#
#****************************************************************#