10 files changed, 728 insertions, 61 deletions
diff --git a/demos/Demo1.m b/demos/Demo1.m
index 3d57795..15e2e5b 100644
--- a/demos/Demo1.m
+++ b/demos/Demo1.m
@@ -15,7 +15,7 @@
 close all;clc;clear all;
 % adding paths
 addpath('../data/'); 
-addpath('../main_func/'); 
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/');
 addpath('../supp/'); 
 
 load phantom_bone512.mat % load the phantom
diff --git a/demos/DemoRD1.m b/demos/DemoRD1.m
index e190d34..5bb5f6b 100644
--- a/demos/DemoRD1.m
+++ b/demos/DemoRD1.m
@@ -5,7 +5,7 @@ close all
 
 % adding paths
 addpath('../data/'); 
-addpath('../main_func/'); 
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/');
 addpath('../supp/'); 
 
 load('sino_basalt.mat') % load real neutron data
diff --git a/demos/DemoRD2.m b/demos/DemoRD2.m
index 0f829a8..0db3595 100644
--- a/demos/DemoRD2.m
+++ b/demos/DemoRD2.m
@@ -1,12 +1,12 @@
 % Demonstration of tomographic 3D reconstruction from X-ray synchrotron
 % dataset (dendrites) using various data fidelities
-% warning: can take up to 15-20 minutes to run for the whole 3D data
+% ! can take up to 15-20 minutes to run for the whole 3D data !
 clear all
 close all
 %%
 % % adding paths
 addpath('../data/');
-addpath('../main_func/');
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/');
 addpath('../supp/');
 
 load('DendrRawData.mat') % load raw data of 3D dendritic set
@@ -30,7 +30,7 @@ Weights3D = single(data_raw3D); % weights for PW model
 clear data_raw3D
 %%
 % set projection/reconstruction geometry here
-Z_slices = 20;
+Z_slices = 3;
 det_row_count = Z_slices;
 proj_geom = astra_create_proj_geom('parallel3d', 1, 1, det_row_count, size_det, angles_rad);
 vol_geom = astra_create_vol_geom(recon_size,recon_size,Z_slices);
@@ -44,14 +44,13 @@ clear params
 params.proj_geom = proj_geom; % pass geometry to the function
 params.vol_geom = vol_geom;
 params.sino = Sino3D;
-params.L_const = 7.6789e+08; % found quickly for one slice first
-params.iterFISTA  = 30;
+params.iterFISTA  = 35;
 params.weights = Weights3D;
 params.show = 1;
-params.maxvalplot = 2.5; params.slice = 4;
+params.maxvalplot = 2.5; params.slice = 2;
 
 tic; [X_fista, output] = FISTA_REC(params); toc;
-figure; imshow(X_fista(:,:,1) , [0, 2.5]); title ('FISTA-PWLS reconstruction');
+figure; imshow(X_fista(:,:,params.slice) , [0, 2.5]); title ('FISTA-PWLS reconstruction');
 %%
 fprintf('%s\n', 'Reconstruction using FISTA-PWLS-TV...');
 clear params
@@ -59,14 +58,13 @@ params.proj_geom = proj_geom; % pass geometry to the function
 params.vol_geom = vol_geom;
 params.sino = Sino3D;
 params.iterFISTA  = 40;
-params.L_const = 7.6789e+08; 
 params.Regul_Lambda_FGPTV = 0.005; % TV regularization parameter for FGP-TV
 params.weights = Weights3D;
 params.show = 1;
-params.maxvalplot = 2.5; params.slice = 4;
+params.maxvalplot = 2.5; params.slice = 2;
 
 tic; [X_fista_TV] = FISTA_REC(params); toc;
-figure; imshow(X_fista_TV(:,:,1) , [0, 2.5]); title ('FISTA-PWLS-TV reconstruction');
+figure; imshow(X_fista_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-PWLS-TV reconstruction');
 %%
 %%
 fprintf('%s\n', 'Reconstruction using FISTA-GH-TV...');
@@ -75,16 +73,15 @@ params.proj_geom = proj_geom; % pass geometry to the function
 params.vol_geom = vol_geom;
 params.sino = Sino3D;
 params.iterFISTA  = 40;
-params.L_const = 7.6789e+08; 
 params.Regul_Lambda_FGPTV = 0.005; % TV regularization parameter for FGP-TV
 params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
 params.Ring_Alpha = 21; % to boost ring removal procedure
 params.weights = Weights3D;
 params.show = 1;
-params.maxvalplot = 2.5; params.slice = 4;
+params.maxvalplot = 2.5; params.slice = 2;
 
 tic; [X_fista_GH_TV] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_TV(:,:,1) , [0, 2.5]); title ('FISTA-GH-TV reconstruction');
+figure; imshow(X_fista_GH_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-GH-TV reconstruction');
 %%
 %%
 fprintf('%s\n', 'Reconstruction using FISTA-GH-TV-LLT...');
@@ -92,7 +89,7 @@ clear params
 params.proj_geom = proj_geom; % pass geometry to the function
 params.vol_geom = vol_geom;
 params.sino = Sino3D;
-params.iterFISTA  = 40;
+params.iterFISTA  = 5;
 params.Regul_Lambda_FGPTV = 0.005; % TV regularization parameter for FGP-TV
 params.Regul_LambdaHO = 250;  % regularization parameter for LLT problem
 params.Regul_tauLLT = 0.0005; % time-step parameter for the explicit scheme
@@ -100,10 +97,10 @@ params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
 params.Ring_Alpha = 21; % to boost ring removal procedure
 params.weights = Weights3D;
 params.show = 1;
-params.maxvalplot = 2.5; params.slice = 10;
+params.maxvalplot = 2.5; params.slice = 2;
 
 tic; [X_fista_GH_TVLLT] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_TVLLT(:,:,1) , [0, 2.5]); title ('FISTA-GH-TV-LLT reconstruction');
+figure; imshow(X_fista_GH_TVLLT(:,:,params.slice) , [0, 2.5]); title ('FISTA-GH-TV-LLT reconstruction');
 %%
 %%
 % fprintf('%s\n', 'Reconstruction using FISTA-Student-TV...');
diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index db2b581..2823691 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -15,7 +15,6 @@ function [X,  output] = FISTA_REC(params)
 %----------------Regularization choices------------------------
 %       - .Regul_Lambda_FGPTV (FGP-TV regularization parameter)
 %       - .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
-%       - .Regul_Lambda_L1 (L1 regularization by soft-thresholding)
 %       - .Regul_Lambda_TVLLT (Higher order SB-LLT regularization parameter)
 %       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
 %       - .Regul_Iterations (iterations for the selected penalty, default 25)
@@ -28,7 +27,7 @@ function [X,  output] = FISTA_REC(params)
 %       - .slice (for 3D volumes - slice number to imshow)
 % ___Output___:
 % 1. X - reconstructed image/volume
-% 2. output - structure with
+% 2. output - a structure with
 %    - .Resid_error - residual error (if X_ideal is given)
 %    - .objective: value of the objective function
 %    - .L_const: Lipshitz constant to avoid recalculations
@@ -74,21 +73,50 @@ if (isfield(params,'L_const'))
     L_const = params.L_const;
 else
     % using Power method (PM) to establish L constant
-    niter = 8; % number of iteration for PM
-    x = rand(N,N,SlicesZ);
-    sqweight = sqrt(weights);
-    [sino_id, y] = astra_create_sino3d_cuda(x, proj_geom, vol_geom);
-    y = sqweight.*y;
-    astra_mex_data3d('delete', sino_id);
-    
-    for i = 1:niter
-        [id,x] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geom, vol_geom);
-        s = norm(x(:));
-        x = x/s;
-        [sino_id, y] = astra_create_sino3d_cuda(x, proj_geom, vol_geom);
+    if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
+        % for parallel geometry we can do just one slice
+        fprintf('%s \n', 'Calculating Lipshitz constant for parallel beam geometry...');
+        niter = 15; % number of iteration for the PM
+        x1 = rand(N,N,1);
+        sqweight = sqrt(weights(:,:,1));
+        proj_geomT = proj_geom;
+        proj_geomT.DetectorRowCount = 1;
+        vol_geomT = vol_geom;
+        vol_geomT.GridSliceCount = 1;
+        [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT);
         y = sqweight.*y;
         astra_mex_data3d('delete', sino_id);
-        astra_mex_data3d('delete', id);
+        
+        for i = 1:niter
+            [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geomT, vol_geomT);
+            s = norm(x1(:));
+            x1 = x1/s;
+            [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT);
+            y = sqweight.*y;
+            astra_mex_data3d('delete', sino_id);
+            astra_mex_data3d('delete', id);
+        end
+        clear proj_geomT vol_geomT
+    else
+        % divergen beam geometry
+        fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry...');
+        niter = 8; % number of iteration for PM
+        x1 = rand(N,N,SlicesZ);
+        sqweight = sqrt(weights);
+        [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
+        y = sqweight.*y;
+        astra_mex_data3d('delete', sino_id);
+        
+        for i = 1:niter
+            [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geom, vol_geom);
+            s = norm(x1(:));
+            x1 = x1/s;
+            [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
+            y = sqweight.*y;
+            astra_mex_data3d('delete', sino_id);
+            astra_mex_data3d('delete', id);
+        end
+        clear x1
     end
     L_const = s;
 end
@@ -112,11 +140,6 @@ if (isfield(params,'Regul_Lambda_SBTV'))
 else
     lambdaSB_TV = 0;
 end
-if (isfield(params,'Regul_Lambda_L1'))
-    lambdaL1 = params.Regul_Lambda_L1;
-else
-    lambdaL1 = 0;
-end
 if (isfield(params,'Regul_tol'))
     tol = params.Regul_tol;
 else
@@ -194,19 +217,18 @@ else
     X = zeros(N,N,SlicesZ, 'single'); % storage for the solution
 end
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-Resid_error = zeros(iterFISTA,1); % error vector
-objective = zeros(iterFISTA,1); % obhective vector
+%----------------Reconstruction part------------------------
+Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given)
+objective = zeros(iterFISTA,1); % objective function values vector
 
 t = 1;
 X_t = X;
 
-% add_ring = zeros(size(sino),'single'); % size of sinogram array
 r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
 r_x = r; % another ring variable
 residual = zeros(size(sino),'single');
 
-% Outer iterations loop
+% Outer FISTA iterations loop
 for i = 1:iterFISTA
     
     X_old = X;
@@ -216,12 +238,10 @@ for i = 1:iterFISTA
     [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
     
     if (lambdaR_L1 > 0)
-        % add ring removal part (Group-Huber fidelity)
+        % ring removal part (Group-Huber fidelity)
         for kkk = 1:anglesNumb
-            % add_ring(:,kkk,:) =  squeeze(sino(:,kkk,:)) - alpha_ring.*r_x;
             residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
-        end
-        
+        end        
         vec = sum(residual,2);
         if (SlicesZ > 1)
             vec = squeeze(vec(:,1,:));
@@ -229,9 +249,10 @@ for i = 1:iterFISTA
         r = r_x - (1./L_const).*vec;
     else
         % no ring removal
-        residual = weights.*(sino_updt - sino);
+        residual = weights.*(sino_updt - sino);        
     end
-    % residual =  weights.*(sino_updt - add_ring);
+    
+    objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
     
     [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
     
@@ -242,27 +263,22 @@ for i = 1:iterFISTA
     if (lambdaFGP_TV > 0)
         % FGP-TV regularization
         [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
-        objective(i) = 0.5.*norm(residual(:))^2 + f_val;
+        objective(i) = objective(i) + f_val;
     end
     if (lambdaSB_TV > 0)
         % Split Bregman regularization
         X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
-        objective(i) = 0.5.*norm(residual(:))^2;
-    end
-    if (lambdaL1 > 0)
-        % L1 soft-threhsolding regularization
-        X = max(abs(X)-lambdaL1, 0).*sign(X);
-        objective(i) = 0.5.*norm(residual(:))^2;
     end
     if (lambdaHO > 0)
         % Higher Order (LLT) regularization
         X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
         X = 0.5.*(X + X2); % averaged combination of two solutions
-        objective(i) = 0.5.*norm(residual(:))^2;
     end
     
+    
+    
     if (lambdaR_L1 > 0)
-        r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator
+        r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
     end
     
     t = (1 + sqrt(1 + 4*t^2))/2; % updating t
@@ -281,9 +297,9 @@ for i = 1:iterFISTA
     end
     if (strcmp(X_ideal, 'none' ) == 0)
         Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
-        fprintf('%s %i %s %s %.4f  %s %s %.4f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
+        fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
     else
-        fprintf('%s %i  %s %s %.4f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
+        fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
     end
 end
 output.Resid_error = Resid_error;
diff --git a/main_func/compile_mex.m b/main_func/compile_mex.m
index 4d97bc2..7bfa8eb 100644
--- a/main_func/compile_mex.m
+++ b/main_func/compile_mex.m
@@ -1,4 +1,10 @@
-% compile mex's
+% compile mex's once
+cd regularizers_CPU/
+
 mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+
+cd ../../
+cd demos
diff --git a/main_func/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c
index 1a1fd13..1a1fd13 100644
--- a/main_func/FGP_TV.c
+++ b/main_func/regularizers_CPU/FGP_TV.c
diff --git a/main_func/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c
index 0aed31e..0aed31e 100644
--- a/main_func/LLT_model.c
+++ b/main_func/regularizers_CPU/LLT_model.c
diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c
new file mode 100644
index 0000000..1ed29d4
--- /dev/null
+++ b/main_func/regularizers_CPU/PatchBased_Regul.c
@@ -0,0 +1,295 @@
+#define _USE_MATH_DEFINES
+
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters (mandatory):
+ * 1. Image (2D or 3D)
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+ * 4. h - parameter for the PB penalty function
+ * 5. lambda - regularization parameter 
+
+ * Output:
+ * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+ *
+ * Quick 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); 
+ *
+ * Please see more tests in a file: 
+   TestTemporalSmoothing.m
+ 
+ *
+ * Matlab + C/mex compilers needed
+ * to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+ *
+ * D. Kazantsev *
+ * 02/07/2014
+ * Harwell, UK
+ */
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
+float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);      
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[]) 
+{    
+    int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop;
+    const int  *dims;
+    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda;
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);
+    
+    N = dims[0];
+    M = dims[1];
+    Z = dims[2];
+    
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    
+    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
+    
+    /*Handling inputs*/
+    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
+    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
+
+    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
+    if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
+       
+    SearchW = SearchW_real + 2*SimilW;
+    
+    /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
+
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    newsizeX = N + 2*(padXY); /* the X size of the padded array */
+    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
+    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
+    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
+    
+    /******************************2D case ****************************/
+    if (numdims == 2) {
+        /*Handling output*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+    }
+    else
+    {
+        /******************************3D case ****************************/
+        /*Handling output*/
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        
+        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+    } /*end else ndims*/ 
+}    
+    
+/*2D version*/
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
+{
+    int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1;
+                 
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);   
+    
+     /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2);
+            Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW));
+            count = count + 1;                       
+        }} /*main neighb loop */   
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count,  i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight,  value)
+    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+             if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY))) {
+          
+                /* Massive Search window loop */
+                Weight_norm = 0; value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                       /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 i2 = i1+i_l; j2 = j1+j_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}                      
+      
+                         value += A[i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;           
+                    }}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;
+                B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom;         
+             }
+        }}   /*main loop*/     
+    return (*B);
+    free(Eucl_Vec);    
+}
+ 
+/*3D version*/ 
+ float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda)       
+ {
+    int SimilW_full, count, i, j, k,  i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob;
+        
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);
+    
+    /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            for(k_n=-SimilW; k_n<=SimilW; k_n++) {
+                Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW));
+                count = count + 1;
+            }}} /*main neighb loop */
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count,  i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value)    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+            if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY)) &&  ((k >= padXY) && (k < dimZ-padXY))) {
+            /* take all elements around the pixel of interest */                             
+               /* Massive Search window loop */
+                Weight_norm = 0;  value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                        for(k_m=-SearchW; k_m<=SearchW; k_m++) {
+                         /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m; k1 = k+k_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 for(k_l=-SimilW; k_l<=SimilW; k_l++) {                                 
+                                 i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l; k3 = k+k_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         for(k_p=-SimilW; k_p<=SimilW; k_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}}                                                 
+                         value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;
+             
+                    }}}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;               
+                B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom;      
+            }            
+        }}}   /*main loop*/              
+       free(Eucl_Vec);        
+       return *B;
+}
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
+{
+    /* padding-cropping function */
+    int i,j,k;    
+    if (NewSizeZ > 1) {    
+           for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+              for (k=0; k < NewSizeZ; k++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
+                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
+                }
+            }}}   
+    }
+    else {
+        for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
+                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
+                }
+            }}
+    }
+    return *Ap;
+}
+\ No newline at end of file
diff --git a/main_func/SplitBregman_TV.c b/main_func/regularizers_CPU/SplitBregman_TV.c
index f143aa6..f143aa6 100644
--- a/main_func/SplitBregman_TV.c
+++ b/main_func/regularizers_CPU/SplitBregman_TV.c
diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
new file mode 100644
index 0000000..41f8615
--- /dev/null
+++ b/main_func/regularizers_CPU/TGV_PD.c
@@ -0,0 +1,353 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D)
+ * 2. lambda - regularization parameter
+ * 3. parameter to control first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of CP iterations
+ *
+ * Output:
+ * Filtered/regularized image 
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .03*randn(size(Im)); % adding noise
+ * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+ *
+ * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References:
+ * K. Bredies "Total Generalized Variation"
+ *
+ * 28.11.16/Harwell
+ */
+ 
+/* 2D functions */
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma);
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
+/*3D functions*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma);
+
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll;
+    const int  *dim_array;
+    float *A, *U, *U_old, *P1, *P2, *P3, *Q1, *Q2, *Q3, *Q4, *Q5, *Q6, *Q7, *Q8, *Q9, *V1, *V1_old, *V2, *V2_old, *V3, *V3_old, lambda, L2, tau, sigma,  alpha1, alpha0;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
+    alpha1 =  (float) mxGetScalar(prhs[2]); /*first-order term*/
+    alpha0 =  (float) mxGetScalar(prhs[3]); /*second-order term*/
+    iter =  (int) mxGetScalar(prhs[4]); /*iterations number*/
+    if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations");
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        /*2D case*/
+        dimZ = 1;
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /*dual variables*/
+        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   }
+    else if (number_of_dims == 3) {
+        mexErrMsgTxt("The input data should be 2D");
+        /*3D case*/
+//         dimZ = dim_array[2];
+//         U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         Q1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q4 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q5 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q6 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q7 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q8 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         Q9 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         
+//         V1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+//         V3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));	  
+    }
+    else {mexErrMsgTxt("The input data should be 2D");}
+    
+    
+    /*printf("%i \n", i);*/
+    L2 = 12.0; /*Lipshitz constant*/
+    tau = 1.0/pow(L2,0.5);
+    sigma = 1.0/pow(L2,0.5);
+    
+    /*Copy A to U*/
+    copyIm(A, U, dimX, dimY, dimZ);
+    
+    if (number_of_dims == 2) {
+        /* Here primal-dual iterations begin for 2D */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau);
+            
+            /*get updated solution U*/
+            newU(U, U_old, dimX, dimY, dimZ);
+            
+            /*saving V into V_old*/
+            copyIm(V1, V1_old, dimX, dimY, dimZ);
+            copyIm(V2, V2_old, dimX, dimY, dimZ);
+            
+            /* upd V*/
+            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau);
+            
+            /*get new V*/
+            newU(V1, V1_old, dimX, dimY, dimZ);
+            newU(V2, V2_old, dimX, dimY, dimZ);
+        } /*end of iterations*/
+    }
+    
+//     /*3D version*/
+//     if (number_of_dims == 3) {
+//         /* Here primal-dual iterations begin for 3D */
+//         for(ll = 0; ll < iter; ll++) {
+//             
+//             /* Calculate Dual Variable P */
+//             DualP_3D(U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+//             
+//             /*Projection onto convex set for P*/
+//             ProjP_3D(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+//             
+//             /* Calculate Dual Variable Q */
+//             DualQ_3D(V1, V2, V2, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, dimX, dimY, dimZ, sigma);
+//             
+//         } /*end of iterations*/
+//     }
+}
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i-1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            else P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i + 1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            if (j == dimY-1) P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j-1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+            else  P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j+1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+        }}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(P1,P2) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(P1[i*dimY + (j)],2) + pow(P2[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0) {
+                P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn;
+                P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+    float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1)
+            { q1 = (V1[(i-1)*dimY + (j)] - V1[i*dimY + (j)]);
+              q11 = (V2[(i-1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            else {
+                q1 = (V1[(i+1)*dimY + (j)] - V1[i*dimY + (j)]);
+                q11 = (V2[(i+1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            if (j == dimY-1) {
+                q2 = (V2[(i)*dimY + (j-1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j-1)] - V1[i*dimY + (j)]);
+            }
+            else {
+                q2 = (V2[(i)*dimY + (j+1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j+1)] - V1[i*dimY + (j)]);
+            }
+            Q1[i*dimY + (j)] = Q1[i*dimY + (j)] + sigma*(q1);
+            Q2[i*dimY + (j)] = Q2[i*dimY + (j)] + sigma*(q2);
+            Q3[i*dimY + (j)] = Q3[i*dimY + (j)]  + sigma*(0.5f*(q11 + q22));
+        }}
+    return 1;
+}
+
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(Q1[i*dimY + (j)],2) + pow(Q2[i*dimY + (j)],2) + 2*pow(Q3[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0) {
+                Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn;
+                Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn;
+                Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+    int i,j;
+    float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            if (i == 0) P_v1 = (P1[i*dimY + (j)]);
+            else P_v1 = (P1[i*dimY + (j)] - P1[(i-1)*dimY + (j)]);
+            if (j == 0) P_v2 = (P2[i*dimY + (j)]);
+            else  P_v2 = (P2[i*dimY + (j)] - P2[(i)*dimY + (j-1)]);
+            div = P_v1 + P_v2;
+            U[i*dimY + (j)] = (lambda*(U[i*dimY + (j)] + tau*div) + tau*A[i*dimY + (j)])/(lambda + tau);
+        }}
+    return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ)
+{
+    int i;
+#pragma omp parallel for shared(U,U_old) private(i)
+    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2*U[i] - U_old[i];
+    return *U;
+}
+
+/*get update for V*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau)
+{
+    int i,j;
+    float q1, q11, q2, q22, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i,j, q1, q11, q2, q22, div1, div2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == 0) {
+                q1 = (Q1[i*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)]);
+            }
+            else {
+                q1 = (Q1[i*dimY + (j)] - Q1[(i-1)*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)] - Q3[(i-1)*dimY + (j)]);
+            }
+            if (j == 0) {
+                q2 = (Q2[i*dimY + (j)]);
+                q22 = (Q3[i*dimY + (j)]);
+            }
+            else  {
+                q2 = (Q2[i*dimY + (j)] - Q2[(i)*dimY + (j-1)]);
+                q22 = (Q3[i*dimY + (j)] - Q3[(i)*dimY + (j-1)]);
+            }
+            div1 = q1 + q22;
+            div2 = q2 + q11;
+            V1[i*dimY + (j)] = V1[i*dimY + (j)] + tau*(P1[i*dimY + (j)] + div1);
+            V2[i*dimY + (j)] = V2[i*dimY + (j)] + tau*(P2[i*dimY + (j)] + div2);
+        }}
+    return 1;
+}
+/* Copy Image */
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
+{
+    int j;
+#pragma omp parallel for shared(A, U) private(j)
+    for(j=0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+    return *U;
+}
+/*********************3D *********************/
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j,k;
+#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                if (i == dimX-1) P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i-1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                else P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i + 1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                if (j == dimY-1) P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j-1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                else  P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j+1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                if (k == dimZ-1) P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k-1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+                else  P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k+1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+            }}}
+    return 1;
+}
+\ No newline at end of file