summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Core/regularisers_CPU/TGV_core.c40
-rw-r--r--Core/regularisers_CPU/TGV_core.h2
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu455
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.h2
-rw-r--r--Readme.md4
-rwxr-xr-xWrappers/Python/conda-recipe/run_test.py2
-rw-r--r--Wrappers/Python/demos/demo_cpu_regularisers.py4
-rw-r--r--Wrappers/Python/demos/demo_cpu_regularisers3D.py54
-rw-r--r--Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py4
-rw-r--r--Wrappers/Python/demos/demo_gpu_regularisers.py4
-rw-r--r--Wrappers/Python/demos/demo_gpu_regularisers3D.py51
-rw-r--r--Wrappers/Python/src/cpu_regularisers.pyx35
-rw-r--r--Wrappers/Python/src/gpu_regularisers.pyx39
13 files changed, 583 insertions, 113 deletions
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c
index ef9ef98..805c3d4 100644
--- a/Core/regularisers_CPU/TGV_core.c
+++ b/Core/regularisers_CPU/TGV_core.c
@@ -139,18 +139,13 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in
newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
/*saving V into V_old*/
- copyIm(V1, V1_old, (long)(dimX), (long)(dimY), (long)(dimZ));
- copyIm(V2, V2_old, (long)(dimX), (long)(dimY), (long)(dimZ));
- copyIm(V3, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
/* upd V*/
UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
/*get new V*/
- newU3D(V1, V1_old, (long)(dimX), (long)(dimY), (long)(dimZ));
- newU3D(V2, V2_old, (long)(dimX), (long)(dimY), (long)(dimZ));
- newU3D(V3, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+ newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
} /*end of iterations*/
free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
}
@@ -454,14 +449,39 @@ float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3,
}}}
return 1;
}
+
+float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+ long j;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j)
+ for (j = 0; j<dimX*dimY*dimZ; j++) {
+ V1_old[j] = V1[j];
+ V2_old[j] = V2[j];
+ V3_old[j] = V3[j];
+ }
+ return 1;
+}
+
/*get updated solution U*/
float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ)
{
long i;
-#pragma omp parallel for shared(U,U_old) private(i)
+#pragma omp parallel for shared(U, U_old) private(i)
+ for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2.0f*U[i] - U_old[i];
+ return *U;
+}
+
+
+/*get updated solution U*/
+float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+ long i;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i)
for(i=0; i<dimX*dimY*dimZ; i++) {
- U[i] = 2.0f*U[i] - U_old[i];
+ V1[i] = 2.0f*V1[i] - V1_old[i];
+ V2[i] = 2.0f*V2[i] - V2_old[i];
+ V3[i] = 2.0f*V3[i] - V3_old[i];
}
- return *U;
+ return 1;
}
diff --git a/Core/regularisers_CPU/TGV_core.h b/Core/regularisers_CPU/TGV_core.h
index 8ff084e..11b12c1 100644
--- a/Core/regularisers_CPU/TGV_core.h
+++ b/Core/regularisers_CPU/TGV_core.h
@@ -66,6 +66,8 @@ CCPI_EXPORT float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5
CCPI_EXPORT float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau);
CCPI_EXPORT float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau);
CCPI_EXPORT float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
#ifdef __cplusplus
}
#endif
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 73232a9..58b2c41 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -21,10 +21,10 @@ limitations under the License.
#include "shared.h"
/* CUDA implementation of Primal-Dual denoising method for
- * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
*
* Input Parameters:
- * 1. Noisy image (2D)
+ * 1. Noisy image/volume (2D/3D)
* 2. lambda - regularisation parameter
* 3. parameter to control the first-order term (alpha1)
* 4. parameter to control the second-order term (alpha0)
@@ -38,6 +38,10 @@ limitations under the License.
* [1] K. Bredies "Total Generalized Variation"
*/
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
#define EPS 1.0e-7
@@ -48,9 +52,8 @@ limitations under the License.
/***************************2D Functions*****************************/
/********************************************************************/
__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
-{
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
@@ -67,9 +70,9 @@ __global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float
__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
{
- float grad_magn;
+ float grad_magn;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
@@ -78,7 +81,7 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float
grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
grad_magn = grad_magn/alpha1;
- if (grad_magn > 1.0) {
+ if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
P2[index] /= grad_magn;
}
@@ -90,64 +93,56 @@ __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, floa
{
float q1, q2, q11, q22;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
/* symmetric boundary conditions (Neuman) */
- if (i == dimX-1)
- { q1 = (V1[j*dimX+(i-1)] - V1[index]);
- q11 = (V2[j*dimX+(i-1)] - V2[index]);
+ q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
+ /* boundary conditions (Neuman) */
+ if (i != dimX-1){
+ q1 = V1[j*dimX+(i+1)] - V1[index];
+ q11 = V2[j*dimX+(i+1)] - V2[index];
}
- else {
- q1 = (V1[j*dimX+(i+1)] - V1[index]);
- q11 = (V2[j*dimX+(i+1)] - V2[index]);
- }
- if (j == dimY-1) {
- q2 = (V2[(j-1)*dimX+i] - V2[index]);
- q22 = (V1[(j-1)*dimX+i] - V1[index]);
- }
- else {
+ if (j != dimY-1) {
q2 = V2[(j+1)*dimX+i] - V2[index];
q22 = V1[(j+1)*dimX+i] - V1[index];
}
Q1[index] += sigma*(q1);
Q2[index] += sigma*(q2);
Q3[index] += sigma*(0.5f*(q11 + q22));
- }
+ }
return;
}
__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
{
- float grad_magn;
+ float grad_magn;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
grad_magn = grad_magn/alpha0;
- if (grad_magn > 1.0) {
+ if (grad_magn > 1.0f) {
Q1[index] /= grad_magn;
Q2[index] /= grad_magn;
Q3[index] /= grad_magn;
- }
- }
+ }
+ }
return;
}
__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
{
- float P_v1, P_v2, div;
+ float P_v1, P_v2, div;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
@@ -166,49 +161,54 @@ __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, in
__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
{
- float q1, q11, q2, q22, div1, div2;
+ float q1, q3_x, q2, q3_y, div1, div2;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-
- /* symmetric boundary conditions (Neuman) */
- if (i == 0) {
- q1 = Q1[index];
- q11 = Q3[index];
- }
- else {
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+ q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
+ /* boundary conditions (Neuman) */
+ if (i != 0) {
q1 = Q1[index] - Q1[j*dimX+(i-1)];
- q11 = Q3[index] - Q3[j*dimX+(i-1)];
- }
- if (j == 0) {
- q2 = Q2[index];
- q22 = Q3[index];
+ q3_x = Q3[index] - Q3[j*dimX+(i-1)];
}
- else {
+ if (j != 0) {
q2 = Q2[index] - Q2[(j-1)*dimX+i];
- q22 = Q3[index] - Q3[(j-1)*dimX+i];
+ q3_y = Q3[index] - Q3[(j-1)*dimX+i];
}
- div1 = q1 + q22;
- div2 = q2 + q11;
+ div1 = q1 + q3_y;
+ div2 = q3_x + q2;
V1[index] += tau*(P1[index] + div1);
V2[index] += tau*(P2[index] + div2);
- }
+ }
return;
}
-__global__ void copyIm_TGV_kernel(float *Input, float* Output, int N, int M, int num_total)
+__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total)
{
int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
int index = xIndex + N*yIndex;
- if (index < num_total) {
- Output[index] = Input[index];
+ if (index < num_total) {
+ U_old[index] = U[index];
+ }
+}
+
+__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ V1_old[index] = V1[index];
+ V2_old[index] = V2[index];
}
}
@@ -220,18 +220,276 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
int index = xIndex + N*yIndex;
if (index < num_total) {
- U[index] = 2.0f*U[index] - U_old[index];
+ U[index] = 2.0f*U[index] - U_old[index];
+ }
+}
+
+
+__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ V1[index] = 2.0f*V1[index] - V1_old[index];
+ V2[index] = 2.0f*V2[index] - V2_old[index];
+ }
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{
+ int index;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]);
+ else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]);
+ if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]);
+ else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]);
+ if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]);
+ else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]);
+ }
+ return;
+}
+
+__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1)
+{
+ float grad_magn;
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ P3[index] /= grad_magn;
+ }
+ }
+ return;
+}
+
+__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma)
+{
+ int index;
+ float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ index = (dimX*dimY)*k + j*dimX+i;
+ q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+ /* symmetric boundary conditions (Neuman) */
+ if (i != dimX-1){
+ q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];
+ q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
+ q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
+ }
+ if (j != dimY-1) {
+ q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];
+ q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
+ q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
+ }
+ if (k != dimZ-1) {
+ q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
+ q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
+ q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
+ }
+
+ Q1[index] += sigma*(q1); /*Q11*/
+ Q2[index] += sigma*(q2); /*Q22*/
+ Q3[index] += sigma*(q3); /*Q33*/
+ Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+ Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+ Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+ }
+ return;
+}
+
+
+__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
+{
+ float grad_magn;
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ Q4[index] /= grad_magn;
+ Q5[index] /= grad_magn;
+ Q6[index] /= grad_magn;
+ }
+ }
+ return;
+}
+__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+ float P_v1, P_v2, P_v3, div;
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (i == 0) P_v1 = P1[index];
+ else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ if (j == 0) P_v2 = P2[index];
+ else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ if (k == 0) P_v3 = P3[index];
+ else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+
+ div = P_v1 + P_v2 + P_v3;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }
+ return;
+}
+__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau)
+{
+ float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
+ /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
+ /* symmetric boundary conditions (Neuman) */
+ if (i != 0) {
+ q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+ q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];
+ q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
+ }
+ if (j != 0) {
+ q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+ q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+ q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
+ }
+ if (k != 0) {
+ q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ }
+ div1 = q1 + q4y + q5z;
+ div2 = q4x + q2 + q6z;
+ div3 = q5x + q6y + q3;
+
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ V3[index] += tau*(P3[index] + div3);
+ }
+ return;
+}
+
+__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ U_old[index] = U[index];
}
}
+__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ V1_old[index] = V1[index];
+ V2_old[index] = V2[index];
+ V3_old[index] = V3[index];
+ }
+}
+
+__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ U[index] = 2.0f*U[index] - U_old[index];
+ }
+}
+
+__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ V1[index] = 2.0f*V1[index] - V1_old[index];
+ V2[index] = 2.0f*V2[index] - V2_old[index];
+ V3[index] = 2.0f*V3[index] - V3_old[index];
+ }
+}
+
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-/********************* MAIN HOST FUNCTION ******************/
+/************************ MAIN HOST FUNCTION ***********************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)
{
- int dimTotal, dev = 0;
- CHECK(cudaSetDevice(dev));
- dimTotal = dimX*dimY;
+ int dimTotal, dev = 0;
+ CHECK(cudaSetDevice(dev));
+
+ dimTotal = dimX*dimY*dimZ;
float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
tau = pow(L2,-0.5);
@@ -254,16 +512,17 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
- /*2D case */
+ if (dimZ == 1) {
+ /*2D case */
dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
for(int n=0; n < iterationsNumb; n++) {
- /* Calculate Dual Variable P */
+ /* Calculate Dual Variable P */
DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
- CHECK(cudaDeviceSynchronize());
- /*Projection onto convex set for P*/
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for P*/
ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
CHECK(cudaDeviceSynchronize());
/* Calculate Dual Variable Q */
@@ -282,18 +541,72 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
CHECK(cudaDeviceSynchronize());
/*saving V into V_old*/
- copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal);
- copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal);
+ copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
CHECK(cudaDeviceSynchronize());
/* upd V*/
UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
CHECK(cudaDeviceSynchronize());
/*get new V*/
- newU_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal);
- newU_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal);
+ newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
CHECK(cudaDeviceSynchronize());
+ }
}
-
+ else {
+ /*3D case */
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKXSIZE));
+
+ float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
+
+ CHECK(cudaMalloc((void**)&P3,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q4,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q5,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q6,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float)));
+
+ for(int n=0; n < iterationsNumb; n++) {
+
+ /* Calculate Dual Variable P */
+ DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for P*/
+ ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+ CHECK(cudaDeviceSynchronize());
+ /* Calculate Dual Variable Q */
+ DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for Q*/
+ ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
+ CHECK(cudaDeviceSynchronize());
+ /*saving U into U_old*/
+ copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*adjoint operation -> divergence and projection of P*/
+ DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get updated solution U*/
+ newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*saving V into V_old*/
+ copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /* upd V*/
+ UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get new V*/
+ newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ }
+
+ CHECK(cudaFree(Q4));
+ CHECK(cudaFree(Q5));
+ CHECK(cudaFree(Q6));
+ CHECK(cudaFree(P3));
+ CHECK(cudaFree(V3));
+ CHECK(cudaFree(V3_old));
+ }
+
CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
CHECK(cudaFree(d_U0));
CHECK(cudaFree(d_U));
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
index 5a4eb76..9f73d1c 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
#endif
diff --git a/Readme.md b/Readme.md
index 1db50aa..ebd4d20 100644
--- a/Readme.md
+++ b/Readme.md
@@ -33,7 +33,7 @@
1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *1*)
2. Fast-Gradient-Projection (FGP) Total Variation **2D/3D CPU/GPU** (Ref. *2*)
3. Split-Bregman (SB) Total Variation **2D/3D CPU/GPU** (Ref. *5*)
-4. Total Generalised Variation (TGV) model for higher-order regularisation **2D CPU/GPU** (Ref. *6*)
+4. Total Generalised Variation (TGV) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *6*)
5. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *8*)
6. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *9*)
7. A joint ROF-LLT (Lysaker-Lundervold-Tai) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *10,11*)
@@ -93,7 +93,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge
#### Python (conda-build)
```
- export CIL_VERSION=0.10.3
+ export CIL_VERSION=0.10.4
conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5
conda install ccpi-regulariser=${CIL_VERSION} --use-local --force
cd demos/
diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py
index 37b9dcc..21f3216 100755
--- a/Wrappers/Python/conda-recipe/run_test.py
+++ b/Wrappers/Python/conda-recipe/run_test.py
@@ -303,7 +303,7 @@ class TestRegularisers(unittest.TestCase):
'input' : u0,\
'regularisation_parameter':0.04, \
'alpha1':1.0,\
- 'alpha0':0.7,\
+ 'alpha0':2.0,\
'number_of_iterations' :250 ,\
'LipshitzConstant' :12 ,\
}
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index 859b633..e6befa9 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -225,8 +225,8 @@ pars = {'algorithm' : TGV, \
'input' : u0,\
'regularisation_parameter':0.04, \
'alpha1':1.0,\
- 'alpha0':0.7,\
- 'number_of_iterations' :250 ,\
+ 'alpha0':2.0,\
+ 'number_of_iterations' :1350 ,\
'LipshitzConstant' :12 ,\
}
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers3D.py b/Wrappers/Python/demos/demo_cpu_regularisers3D.py
index c42c37b..2d2fc22 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers3D.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers3D.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
from qualitymetrics import rmse
###############################################################################
def printParametersToString(pars):
@@ -68,7 +68,7 @@ Im2[:,0:M] = Im[:,0:M]
Im = Im2
del Im2
"""
-slices = 20
+slices = 15
noisyVol = np.zeros((slices,N,M),dtype='float32')
noisyRef = np.zeros((slices,N,M),dtype='float32')
@@ -96,7 +96,7 @@ pars = {'algorithm': ROF_TV, \
'input' : noisyVol,\
'regularisation_parameter':0.04,\
'number_of_iterations': 500,\
- 'time_marching_parameter': 0.0025
+ 'time_marching_parameter': 0.0025
}
print ("#############ROF TV CPU####################")
start_time = timeit.default_timer()
@@ -264,6 +264,54 @@ plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF'))
#%%
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+ 'input' : noisyVol,\
+ 'regularisation_parameter':0.04, \
+ 'alpha1':1.0,\
+ 'alpha0':2.0,\
+ 'number_of_iterations' :250 ,\
+ 'LipshitzConstant' :12 ,\
+ }
+
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu3D = TGV(pars['input'],
+ pars['regularisation_parameter'],
+ pars['alpha1'],
+ pars['alpha0'],
+ pars['number_of_iterations'],
+ pars['LipshitzConstant'],'cpu')
+
+
+rms = rmse(idealVol, tgv_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using TGV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print ("________________NDF (3D)___________________")
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index 275e844..230a761 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -323,8 +323,8 @@ pars = {'algorithm' : TGV, \
'input' : u0,\
'regularisation_parameter':0.04, \
'alpha1':1.0,\
- 'alpha0':0.7,\
- 'number_of_iterations' :250 ,\
+ 'alpha0':2.0,\
+ 'number_of_iterations' :400 ,\
'LipshitzConstant' :12 ,\
}
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index 9115494..e1c6575 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -223,8 +223,8 @@ pars = {'algorithm' : TGV, \
'input' : u0,\
'regularisation_parameter':0.04, \
'alpha1':1.0,\
- 'alpha0':0.7,\
- 'number_of_iterations' :250 ,\
+ 'alpha0':2.0,\
+ 'number_of_iterations' :1250 ,\
'LipshitzConstant' :12 ,\
}
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers3D.py b/Wrappers/Python/demos/demo_gpu_regularisers3D.py
index cda2847..b6058d2 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers3D.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers3D.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
from qualitymetrics import rmse
###############################################################################
def printParametersToString(pars):
@@ -67,7 +67,7 @@ Im = Im2
del Im2
"""
-#%%
+
slices = 20
filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
@@ -268,6 +268,53 @@ plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF'))
#%%
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+ 'input' : noisyVol,\
+ 'regularisation_parameter':0.04, \
+ 'alpha1':1.0,\
+ 'alpha0':2.0,\
+ 'number_of_iterations' :600 ,\
+ 'LipshitzConstant' :12 ,\
+ }
+
+print ("#############TGV GPU####################")
+start_time = timeit.default_timer()
+tgv_gpu3D = TGV(pars['input'],
+ pars['regularisation_parameter'],
+ pars['alpha1'],
+ pars['alpha0'],
+ pars['number_of_iterations'],
+ pars['LipshitzConstant'],'gpu')
+
+
+rms = rmse(idealVol, tgv_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using TGV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print ("_______________NDF-TV (3D)_________________")
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index 7d57ed1..11a0617 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -22,7 +22,7 @@ cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar,
cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
@@ -202,12 +202,8 @@ def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, Lip
return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0,
iterations, LipshitzConst)
elif inputData.ndim == 3:
- shape = inputData.shape
- out = inputData.copy()
- for i in range(shape[0]):
- out[i,:,:] = TGV_2D(inputData[i,:,:], regularisation_parameter,
- alpha1, alpha0, iterations, LipshitzConst)
- return out
+ return TGV_3D(inputData, regularisation_parameter, alpha1, alpha0,
+ iterations, LipshitzConst)
def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float regularisation_parameter,
@@ -229,7 +225,30 @@ def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
alpha0,
iterationsNumb,
LipshitzConst,
- dims[1],dims[0])
+ dims[1],dims[0],1)
+ return outputData
+def TGV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run TGV iterations for 3D data */
+ TGV_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[2], dims[1], dims[0])
return outputData
#***************************************************************#
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index 47a6149..b52f669 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -23,7 +23,7 @@ CUDAErrorMessage = 'CUDA error'
cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
-cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
@@ -102,12 +102,7 @@ def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, Lip
if inputData.ndim == 2:
return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
elif inputData.ndim == 3:
- shape = inputData.shape
- out = inputData.copy()
- for i in range(shape[0]):
- out[i,:,:] = TGV2D(inputData[i,:,:], regularisation_parameter,
- alpha1, alpha0, iterations, LipshitzConst)
- return out
+ return TGV3D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
# Directional Total-variation Fast-Gradient-Projection (FGP)
def dTV_FGP_GPU(inputData,
refdata,
@@ -393,7 +388,6 @@ def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
raise ValueError(CUDAErrorMessage);
-
#***************************************************************#
#***************** Total Generalised Variation *****************#
#***************************************************************#
@@ -417,11 +411,38 @@ def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
alpha0,
iterationsNumb,
LipshitzConst,
- dims[1],dims[0])==0):
+ dims[1],dims[0], 1)==0):
return outputData
else:
raise ValueError(CUDAErrorMessage);
+def TGV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (TGV_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#****************************************************************#
#**************Directional Total-variation FGP ******************#