10 files changed, 1266 insertions, 1271 deletions
diff --git a/src/Core/regularisers_CPU/Diffus4th_order_core.c b/src/Core/regularisers_CPU/Diffus4th_order_core.c
index 28ac8a9..840b736 100644
--- a/src/Core/regularisers_CPU/Diffus4th_order_core.c
+++ b/src/Core/regularisers_CPU/Diffus4th_order_core.c
@@ -50,35 +50,35 @@ float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float l
     float *W_Lapl=NULL, *Output_prev=NULL;
     sigmaPar2 = sigmaPar*sigmaPar;
     DimTotal =  dimX*dimY*dimZ;
-
+    
     W_Lapl = calloc(DimTotal, sizeof(float));
-
+    
     if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
-
+    
     /* copy into output */
     copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+    
     for(i=0; i < iterationsNumb; i++) {
-      if ((epsil != 0.0f) && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-      if (dimZ == 1) {
+        if ((epsil != 0.0f) && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+        if (dimZ == 1) {
             /* running 2D diffusion iterations */
             /* Calculating weighted Laplacian */
             Weighted_Laplc2D(W_Lapl, Output, sigmaPar2, dimX, dimY);
             /* Perform iteration step */
             Diffusion_update_step2D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY));
-		        }
-      else {
+        }
+        else {
             /* running 3D diffusion iterations */
             /* Calculating weighted Laplacian */
             Weighted_Laplc3D(W_Lapl, Output, sigmaPar2, dimX, dimY, dimZ);
             /* Perform iteration step */
             Diffusion_update_step3D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-          }
-
-          /* check early stopping criteria */
-          if ((epsil != 0.0f) && (i % 5 == 0)) {
-          re = 0.0f; re1 = 0.0f;
+        }
+        
+        /* check early stopping criteria */
+        if ((epsil != 0.0f) && (i % 5 == 0)) {
+            re = 0.0f; re1 = 0.0f;
             for(j=0; j<DimTotal; j++)
             {
                 re += powf(Output[j] - Output_prev[j],2);
@@ -87,10 +87,10 @@ float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float l
             re = sqrtf(re)/sqrtf(re1);
             if (re < epsil)  count++;
             if (count > 3) break;
-          }
-	   }
-	  free(W_Lapl);
-
+        }
+    }
+    free(W_Lapl);
+    
     if (epsil != 0.0f) free(Output_prev);
     /*adding info into info_vector */
     infovector[0] = (float)(i);  /*iterations number (if stopped earlier based on tolerance)*/
@@ -104,74 +104,71 @@ float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long di
 {
     long i,j,i1,i2,j1,j2,index;
     float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
-
-        #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
+    
+#pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
+    for(j=0; j<dimY; j++) {
+        /* symmetric boundary conditions */
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
         for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				 /* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-
-				index = j*dimX+i;
-
-				gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
-				gradX_sq = pow(gradX,2);
-
-				gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
-                gradY_sq = pow(gradY,2);
-
-                gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
-                gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
-
-                gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
-                xy_2 = 2.0f*gradX*gradY*gradXY;
-
-                denom =  gradX_sq + gradY_sq;
-
-                if (denom <= EPS) {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS;
-                    }
-                else  {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;
-                    }
-
-                c = 1.0f/(1.0f + denom/sigma);
-                c_sq = c*c;
-
-                W_Lapl[index] = c_sq*V_norm + c*V_orth;
+            /* symmetric boundary conditions */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            i2 = i-1; if (i2 < 0) i2 = i+1;
+            index = j*dimX+i;
+            
+            gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
+            gradX_sq = pow(gradX,2);
+            
+            gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
+            gradY_sq = pow(gradY,2);
+            
+            gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
+            gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
+            
+            gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
+            xy_2 = 2.0f*gradX*gradY*gradXY;
+            
+            denom =  gradX_sq + gradY_sq;
+            
+            if (denom <= EPS) {
+                V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
+                V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS;
             }
-        }
-        return *W_Lapl;
+            else  {
+                V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
+                V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;
+            }
+            
+            c = 1.0f/(1.0f + denom/sigma);
+            c_sq = c*c;
+            
+            W_Lapl[index] = c_sq*V_norm + c*V_orth;
+        }}
+    return *W_Lapl;
 }
 
 float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY)
 {
-	long i,j,i1,i2,j1,j2,index;
+    long i,j,i1,i2,j1,j2,index;
     float gradXXc, gradYYc;
-
-            #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
+    
+#pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
+    for(j=0; j<dimY; j++) {
+        /* symmetric boundary conditions */
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
         for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				 /* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-					index = j*dimX+i;
-
-                    gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
-
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
-                }
-            }
-	return *Output;
+            /* symmetric boundary conditions */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            i2 = i-1; if (i2 < 0) i2 = i+1;
+            index = j*dimX+i;
+            
+            gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
+            gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
+            
+            Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
+        }}
+    return *Output;
 }
 /********************************************************************/
 /***************************3D Functions*****************************/
@@ -180,95 +177,89 @@ float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long di
 {
     long i,j,k,i1,i2,j1,j2,k1,k2,index;
     float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
-
-        #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				/* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-
-				for(k=0; k<dimZ; k++) {
-				/* symmetric boundary conditions */
-				k1 = k+1; if (k1 == dimZ) k1 = k-1;
-				k2 = k-1; if (k2 < 0) k2 = k+1;
-
-				index = (dimX*dimY)*k + j*dimX+i;
-
-				gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
-				gradX_sq = pow(gradX,2);
-
-				gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
+    
+#pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
+    for(k=0; k<dimZ; k++) {
+        /* symmetric boundary conditions */
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            for(i=0; i<dimX; i++) {
+                /* symmetric boundary conditions */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
+                gradX_sq = pow(gradX,2);
+                
+                gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
                 gradY_sq = pow(gradY,2);
-
+                
                 gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
                 gradZ_sq = pow(gradZ,2);
-
+                
                 gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
                 gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
                 gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
-
+                
                 gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
                 gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
                 gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
-
+                
                 xy_2  = 2.0f*gradX*gradY*gradXY;
                 xyz_1 = 2.0f*gradX*gradZ*gradXZ;
                 xyz_2 = 2.0f*gradY*gradZ*gradYZ;
-
+                
                 denom =  gradX_sq + gradY_sq + gradZ_sq;
-
-					if (denom <= EPS) {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
+                
+                if (denom <= EPS) {
+                    V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
                     V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
-					}
-					else  {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
+                }
+                else  {
+                    V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
                     V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
-					}
-
+                }
+                
                 c = 1.0f/(1.0f + denom/sigma);
                 c_sq = c*c;
-
+                
                 W_Lapl[index] = c_sq*V_norm + c*V_orth;
-				}
-            }
-        }
-        return *W_Lapl;
+            }}}
+    return *W_Lapl;
 }
 
 float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ)
 {
-	long i,j,i1,i2,j1,j2,index,k,k1,k2;
+    long i,j,i1,i2,j1,j2,index,k,k1,k2;
     float gradXXc, gradYYc, gradZZc;
-
-        #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				/* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-
-				for(k=0; k<dimZ; k++) {
-				/* symmetric boundary conditions */
-				k1 = k+1; if (k1 == dimZ) k1 = k-1;
-				k2 = k-1; if (k2 < 0) k2 = k+1;
-
-				index = (dimX*dimY)*k + j*dimX+i;
-
-                    gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
-                    gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
-
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
-                }
-            }
-		}
-	return *Output;
+    
+#pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
+    for(k=0; k<dimZ; k++) {
+        /* symmetric boundary conditions */
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            for(i=0; i<dimX; i++) {
+                /* symmetric boundary conditions */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
+                gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
+                gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
+                
+                Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
+            }}}
+    return *Output;
 }
diff --git a/src/Core/regularisers_CPU/Diffusion_core.c b/src/Core/regularisers_CPU/Diffusion_core.c
index 7f06dd8..f0039c7 100644
--- a/src/Core/regularisers_CPU/Diffusion_core.c
+++ b/src/Core/regularisers_CPU/Diffusion_core.c
@@ -40,7 +40,7 @@ int signNDFc(float x) {
  * 5. tau - time-marching step for explicit scheme
  * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
  * 7. eplsilon - tolerance constant
-
+ *
  * Output:
  * [1] Filtered/regularized image/volume
  * [2] Information vector which contains [iteration no., reached tolerance]
@@ -60,149 +60,148 @@ float Diffusion_CPU_main(float *Input, float *Output, float *infovector, float l
     re = 0.0f; re1 = 0.0f;
     int count = 0;
     DimTotal = (long)(dimX*dimY*dimZ);
-
+    
     if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
-
+    
     /* copy into output */
     copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+    
     for(i=0; i < iterationsNumb; i++) {
-
-      if ((epsil != 0.0f)  && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-      if (dimZ == 1) {
+        
+        if ((epsil != 0.0f)  && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+        if (dimZ == 1) {
             /* running 2D diffusion iterations */
             if (sigmaPar == 0.0f) LinearDiff2D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
             else NonLinearDiff2D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
-          }
-      else {
-        	/* running 3D diffusion iterations */
-        if (sigmaPar == 0.0f) LinearDiff3D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-        else NonLinearDiff3D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
-          }
-          /* check early stopping criteria if epsilon not equal zero */
-          if ((epsil != 0.0f)  && (i % 5 == 0)) {
-          re = 0.0f; re1 = 0.0f;
+        }
+        else {
+            /* running 3D diffusion iterations */
+            if (sigmaPar == 0.0f) LinearDiff3D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+            else NonLinearDiff3D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
+        }
+        /* check early stopping criteria if epsilon not equal zero */
+        if ((epsil != 0.0f)  && (i % 5 == 0)) {
+            re = 0.0f; re1 = 0.0f;
             for(j=0; j<DimTotal; j++)
             {
                 re += powf(Output[j] - Output_prev[j],2);
                 re1 += powf(Output[j],2);
             }
-          re = sqrtf(re)/sqrtf(re1);
-          /* stop if the norm residual is less than the tolerance EPS */
-          if (re < epsil)  count++;
-          if (count > 3) break;
-          }
-		}
-
+            re = sqrtf(re)/sqrtf(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 3) break;
+        }
+    }
+    
     free(Output_prev);
-  /*adding info into info_vector */
+    /*adding info into info_vector */
     infovector[0] = (float)(i);  /*iterations number (if stopped earlier based on tolerance)*/
     infovector[1] = re;  /* reached tolerance */
     return 0;
 }
 
-
 /********************************************************************/
 /***************************2D Functions*****************************/
 /********************************************************************/
 /* linear diffusion (heat equation) */
 float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY)
 {
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-
+    long i,j,i1,i2,j1,j2,index;
+    float e,w,n,s,e1,w1,n1,s1;
+    
 #pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
+    for(j=0; j<dimY; j++) {
         /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            i2 = i-1; if (i2 < 0) i2 = i+1;
             index = j*dimX+i;
-
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
-		}}
-	return *Output;
+            
+            e = Output[j*dimX+i1];
+            w = Output[j*dimX+i2];
+            n = Output[j1*dimX+i];
+            s = Output[j2*dimX+i];
+            
+            e1 = e - Output[index];
+            w1 = w - Output[index];
+            n1 = n - Output[index];
+            s1 = s - Output[index];
+            
+            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+        }}
+    return *Output;
 }
 
 /* nonlinear diffusion */
 float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
 {
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-
+    long i,j,i1,i2,j1,j2,index;
+    float e,w,n,s,e1,w1,n1,s1;
+    
 #pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
+    for(j=0; j<dimY; j++) {
         /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            i2 = i-1; if (i2 < 0) i2 = i+1;
             index = j*dimX+i;
-
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-
+            
+            e = Output[j*dimX+i1];
+            w = Output[j*dimX+i2];
+            n = Output[j1*dimX+i];
+            s = Output[j2*dimX+i];
+            
+            e1 = e - Output[index];
+            w1 = w - Output[index];
+            n1 = n - Output[index];
+            s1 = s - Output[index];
+            
             if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
-            else e1 = e1/sigmaPar;
-
-            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
-            else w1 = w1/sigmaPar;
-
-            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
-            else n1 = n1/sigmaPar;
-
-            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
-            else s1 = s1/sigmaPar;
+                /* Huber penalty */
+                if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
+                else e1 = e1/sigmaPar;
+                
+                if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
+                else w1 = w1/sigmaPar;
+                
+                if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
+                else n1 = n1/sigmaPar;
+                
+                if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
+                else s1 = s1/sigmaPar;
             }
             else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+                /* Perona-Malik */
+                e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+                w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+                n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+                s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
             }
             else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
+                /* Tukey Biweight */
+                if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+                else e1 = 0.0f;
+                if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+                else w1 = 0.0f;
+                if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+                else n1 = 0.0f;
+                if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+                else s1 = 0.0f;
             }
             else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-           Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
-		}}
-	return *Output;
+                printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+                break;
+            }
+            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+        }}
+    return *Output;
 }
 /********************************************************************/
 /***************************3D Functions*****************************/
@@ -210,125 +209,125 @@ float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaP
 /* linear diffusion (heat equation) */
 float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
 {
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-
+    long i,j,k,i1,i2,j1,j2,k1,k2,index;
+    float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+    
 #pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
         for(j=0; j<dimY; j++) {
             /* symmetric boundary conditions (Neuman) */
             j1 = j+1; if (j1 == dimY) j1 = j-1;
             j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-
+            for(i=0; i<dimX; i++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                index = (dimX*dimY)*k + j*dimX+i;
+                
                 e = Output[(dimX*dimY)*k + j*dimX+i1];
                 w = Output[(dimX*dimY)*k + j*dimX+i2];
                 n = Output[(dimX*dimY)*k + j1*dimX+i];
                 s = Output[(dimX*dimY)*k + j2*dimX+i];
                 u = Output[(dimX*dimY)*k1 + j*dimX+i];
                 d = Output[(dimX*dimY)*k2 + j*dimX+i];
-
+                
                 e1 = e - Output[index];
                 w1 = w - Output[index];
                 n1 = n - Output[index];
                 s1 = s - Output[index];
                 u1 = u - Output[index];
                 d1 = d - Output[index];
-
+                
                 Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
-		}}}
-	return *Output;
+            }}}
+    return *Output;
 }
 
 float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
 {
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-
+    long i,j,k,i1,i2,j1,j2,k1,k2,index;
+    float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+    
 #pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
         for(j=0; j<dimY; j++) {
             /* symmetric boundary conditions (Neuman) */
             j1 = j+1; if (j1 == dimY) j1 = j-1;
             j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-
+            for(i=0; i<dimX; i++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                index = (dimX*dimY)*k + j*dimX+i;
+                
                 e = Output[(dimX*dimY)*k + j*dimX+i1];
                 w = Output[(dimX*dimY)*k + j*dimX+i2];
                 n = Output[(dimX*dimY)*k + j1*dimX+i];
                 s = Output[(dimX*dimY)*k + j2*dimX+i];
                 u = Output[(dimX*dimY)*k1 + j*dimX+i];
                 d = Output[(dimX*dimY)*k2 + j*dimX+i];
-
+                
                 e1 = e - Output[index];
                 w1 = w - Output[index];
                 n1 = n - Output[index];
                 s1 = s - Output[index];
                 u1 = u - Output[index];
                 d1 = d - Output[index];
-
-             if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
-            else e1 = e1/sigmaPar;
-
-            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
-            else w1 = w1/sigmaPar;
-
-            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
-            else n1 = n1/sigmaPar;
-
-            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
-            else s1 = s1/sigmaPar;
-
-            if (fabs(u1) > sigmaPar) u1 =  signNDFc(u1);
-            else u1 = u1/sigmaPar;
-
-            if (fabs(d1) > sigmaPar) d1 =  signNDFc(d1);
-            else d1 = d1/sigmaPar;
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
-            u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
-            d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
-            else u1 = 0.0f;
-            if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
-            else d1 = 0.0f;
-            }
-            else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-
+                
+                if (penaltytype == 1){
+                    /* Huber penalty */
+                    if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
+                    else e1 = e1/sigmaPar;
+                    
+                    if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
+                    else w1 = w1/sigmaPar;
+                    
+                    if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
+                    else n1 = n1/sigmaPar;
+                    
+                    if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
+                    else s1 = s1/sigmaPar;
+                    
+                    if (fabs(u1) > sigmaPar) u1 =  signNDFc(u1);
+                    else u1 = u1/sigmaPar;
+                    
+                    if (fabs(d1) > sigmaPar) d1 =  signNDFc(d1);
+                    else d1 = d1/sigmaPar;
+                }
+                else if (penaltytype == 2) {
+                    /* Perona-Malik */
+                    e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+                    w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+                    n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+                    s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+                    u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
+                    d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
+                }
+                else if (penaltytype == 3) {
+                    /* Tukey Biweight */
+                    if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+                    else e1 = 0.0f;
+                    if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+                    else w1 = 0.0f;
+                    if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+                    else n1 = 0.0f;
+                    if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+                    else s1 = 0.0f;
+                    if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
+                    else u1 = 0.0f;
+                    if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
+                    else d1 = 0.0f;
+                }
+                else {
+                    printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+                    break;
+                }
+                
                 Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
-		}}}
-	return *Output;
+            }}}
+    return *Output;
 }
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.c b/src/Core/regularisers_CPU/FGP_TV_core.c
index 69c92bc..a16a2e5 100644
--- a/src/Core/regularisers_CPU/FGP_TV_core.c
+++ b/src/Core/regularisers_CPU/FGP_TV_core.c
@@ -1,21 +1,21 @@
 /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2019 Daniil Kazantsev
-Copyright 2019 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "FGP_TV_core.h"
 
@@ -46,66 +46,66 @@ float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lamb
     float tk = 1.0f;
     float tkp1 =1.0f;
     int count = 0;
-
-	if (dimZ <= 1) {
-	/*2D case */
-  	float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
-	DimTotal = (long)(dimX*dimY);
-
-	      if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
+    
+    if (dimZ <= 1) {
+        /*2D case */
+        float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+        DimTotal = (long)(dimX*dimY);
+        
+        if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
         P1 = calloc(DimTotal, sizeof(float));
         P2 = calloc(DimTotal, sizeof(float));
         P1_prev = calloc(DimTotal, sizeof(float));
         P2_prev = calloc(DimTotal, sizeof(float));
         R1 = calloc(DimTotal, sizeof(float));
         R2 = calloc(DimTotal, sizeof(float));
-
-	/* begin iterations */
+        
+        /* begin iterations */
         for(ll=0; ll<iterationsNumb; ll++) {
-
+            
             if ((epsil != 0.0f)  && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
             /* computing the gradient of the objective function */
             Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-
+            
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-
+            
             /*Taking a step towards minus of the gradient*/
             Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-
+            
             /* projection step */
             Proj_func2D(P1, P2, methodTV, DimTotal);
-
+            
             /*updating R and t*/
             tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
             Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
-
+            
             /*storing old values*/
             copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
             copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
             tk = tkp1;
-
+            
             /* check early stopping criteria */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(Output[j] - Output_prev[j],2);
-        	        re1 += powf(Output[j],2);
-        	    }
-           re = sqrtf(re)/sqrtf(re1);
-           if (re < epsil)  count++;
-           if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
-     }
-	if (epsil != 0.0f) free(Output_prev);
-	free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
-	}
-	else {
-		/*3D case*/
-	float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+        }
+        if (epsil != 0.0f) free(Output_prev);
+        free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
+    }
+    else {
+        /*3D case*/
+        float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
         DimTotal = (long)(dimX*dimY*dimZ);
-
+        
         if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
         P1 = calloc(DimTotal, sizeof(float));
         P2 = calloc(DimTotal, sizeof(float));
@@ -116,58 +116,58 @@ float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lamb
         R1 = calloc(DimTotal, sizeof(float));
         R2 = calloc(DimTotal, sizeof(float));
         R3 = calloc(DimTotal, sizeof(float));
-
-		    /* begin iterations */
+        
+        /* begin iterations */
         for(ll=0; ll<iterationsNumb; ll++) {
-
+            
             if ((epsil != 0.0f)  && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* computing the gradient of the objective function */
             Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-
+            
             /*Taking a step towards minus of the gradient*/
             Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* projection step */
             Proj_func3D(P1, P2, P3, methodTV, DimTotal);
-
+            
             /*updating R and t*/
             tkp1 = (1.0f + sqrtf(1.0f + 4.0f*tk*tk))*0.5f;
             Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
-
+            
             /* calculate norm - stopping rules*/
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(Output[j] - Output_prev[j],2);
-        	        re1 += powf(Output[j],2);
-        	    }
-            re = sqrtf(re)/sqrtf(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                /* stop if the norm residual is less than the tolerance EPS */
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
-
+            
             /*storing old values*/
             copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             tk = tkp1;
         }
-
-	if (epsil != 0.0f) free(Output_prev);
-	free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
-	}
-
-       /*adding info into info_vector */
-        infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
-        infovector[1] = re;  /* reached tolerance */
-
-	return 0;
+        
+        if (epsil != 0.0f) free(Output_prev);
+        free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
+    }
+    
+    /*adding info into info_vector */
+    infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
+    infovector[1] = re;  /* reached tolerance */
+    
+    return 0;
 }
 
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
@@ -177,7 +177,7 @@ float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long di
 #pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
     for(j=0; j<dimY; j++) {
         for(i=0; i<dimX; i++) {
-			index = j*dimX+i;
+            index = j*dimX+i;
             /* boundary conditions  */
             if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
             if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
@@ -193,7 +193,7 @@ float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float la
 #pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
     for(j=0; j<dimY; j++) {
         for(i=0; i<dimX; i++) {
-			index = j*dimX+i;
+            index = j*dimX+i;
             /* boundary conditions */
             if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
             if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
@@ -210,25 +210,25 @@ float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal)
         /* isotropic TV*/
 #pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
         for(i=0; i<DimTotal; i++) {
-                denom = powf(P1[i],2) +  powf(P2[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                }
+            denom = powf(P1[i],2) +  powf(P2[i],2);
+            if (denom > 1.0f) {
+                sq_denom = 1.0f/sqrtf(denom);
+                P1[i] = P1[i]*sq_denom;
+                P2[i] = P2[i]*sq_denom;
             }
+        }
     }
     else {
         /* anisotropic TV*/
 #pragma omp parallel for shared(P1,P2) private(i,val1,val2)
         for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-            }
+            val1 = fabs(P1[i]);
+            val2 = fabs(P2[i]);
+            if (val1 < 1.0f) {val1 = 1.0f;}
+            if (val2 < 1.0f) {val2 = 1.0f;}
+            P1[i] = P1[i]/val1;
+            P2[i] = P2[i]/val2;
+        }
     }
     return 1;
 }
@@ -239,9 +239,9 @@ float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1,
     multip = ((tk-1.0f)/tkp1);
 #pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
     for(i=0; i<DimTotal; i++) {
-            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-        }
+        R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+        R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+    }
     return 1;
 }
 
@@ -255,7 +255,7 @@ float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lamb
     for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-		index = (dimX*dimY)*k + j*dimX+i;
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* boundary conditions */
                 if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
                 if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
@@ -273,7 +273,7 @@ float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R
     for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-		index = (dimX*dimY)*k + j*dimX+i;
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* boundary conditions */
                 if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
                 if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
@@ -289,33 +289,33 @@ float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
     float val1, val2, val3, denom, sq_denom;
     long i;
     if (methTV == 0) {
-	/* isotropic TV*/
-	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
-    for(i=0; i<DimTotal; i++) {
-				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                    P3[i] = P3[i]*sq_denom;
-                }
-			}
-	}
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+        for(i=0; i<DimTotal; i++) {
+            denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+            if (denom > 1.0f) {
+                sq_denom = 1.0f/sqrtf(denom);
+                P1[i] = P1[i]*sq_denom;
+                P2[i] = P2[i]*sq_denom;
+                P3[i] = P3[i]*sq_denom;
+            }
+        }
+    }
     else {
-    /* anisotropic TV*/
+        /* anisotropic TV*/
 #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
-    for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                val3 = fabs(P3[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-                P3[i] = P3[i]/val3;
-            }
-		}
+        for(i=0; i<DimTotal; i++) {
+            val1 = fabs(P1[i]);
+            val2 = fabs(P2[i]);
+            val3 = fabs(P3[i]);
+            if (val1 < 1.0f) {val1 = 1.0f;}
+            if (val2 < 1.0f) {val2 = 1.0f;}
+            if (val3 < 1.0f) {val3 = 1.0f;}
+            P1[i] = P1[i]/val1;
+            P2[i] = P2[i]/val2;
+            P3[i] = P3[i]/val3;
+        }
+    }
     return 1;
 }
 float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
@@ -325,9 +325,9 @@ float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3,
     multip = ((tk-1.0f)/tkp1);
 #pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
     for(i=0; i<DimTotal; i++) {
-                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
-            }
+        R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+        R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+        R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+    }
     return 1;
 }
diff --git a/src/Core/regularisers_CPU/FGP_dTV_core.c b/src/Core/regularisers_CPU/FGP_dTV_core.c
index 4e1e38c..afd7264 100644
--- a/src/Core/regularisers_CPU/FGP_dTV_core.c
+++ b/src/Core/regularisers_CPU/FGP_dTV_core.c
@@ -1,21 +1,21 @@
 /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2019 Daniil Kazantsev
-Copyright 2019 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "FGP_dTV_core.h"
 
@@ -45,18 +45,18 @@ limitations under the License.
 
 float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int dimX, int dimY, int dimZ)
 {
-	  int ll;
+    int ll;
     long j, DimTotal;
-	  float re, re1;
+    float re, re1;
     re = 0.0f; re1 = 0.0f;
-	  float tk = 1.0f;
+    float tk = 1.0f;
     float tkp1=1.0f;
     int count = 0;
-
-
+    
+    
     float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL;
     DimTotal = (long)(dimX*dimY*dimZ);
-
+    
     if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
     P1 = calloc(DimTotal, sizeof(float));
     P2 = calloc(DimTotal, sizeof(float));
@@ -66,119 +66,119 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float *info
     R2 = calloc(DimTotal, sizeof(float));
     InputRef_x = calloc(DimTotal, sizeof(float));
     InputRef_y = calloc(DimTotal, sizeof(float));
-
-	if (dimZ <= 1) {
-    		/*2D case */
+    
+    if (dimZ <= 1) {
+        /*2D case */
         /* calculate gradient field (smoothed) for the reference image */
-		GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, (long)(dimX), (long)(dimY));
-
-		/* begin iterations */
+        GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, (long)(dimX), (long)(dimY));
+        
+        /* begin iterations */
         for(ll=0; ll<iterationsNumb; ll++) {
-
+            
             if ((epsil != 0.0f)  && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
             /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/
             ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, (long)(dimX), (long)(dimY));
-
+            
             /* computing the gradient of the objective function */
             Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-
+            
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-
+            
             /*Taking a step towards minus of the gradient*/
             Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, (long)(dimX), (long)(dimY));
-
+            
             /* projection step */
             Proj_dfunc2D(P1, P2, methodTV, DimTotal);
-
+            
             /*updating R and t*/
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
             Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
-
+            
             copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
             copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
             tk = tkp1;
-
+            
             /* check early stopping criteria */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-              for(j=0; j<DimTotal; j++)
-              {
-                  re += powf(Output[j] - Output_prev[j],2);
-                  re1 += powf(Output[j],2);
-              }
-           re = sqrtf(re)/sqrtf(re1);
-           if (re < epsil)  count++;
-           if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
         }
-	}
-	else {
-		/*3D case*/
-		float *P3=NULL, *P3_prev=NULL, *R3=NULL, *InputRef_z=NULL;
-
+    }
+    else {
+        /*3D case*/
+        float *P3=NULL, *P3_prev=NULL, *R3=NULL, *InputRef_z=NULL;
+        
         P3 = calloc(DimTotal, sizeof(float));
         P3_prev = calloc(DimTotal, sizeof(float));
         R3 = calloc(DimTotal, sizeof(float));
         InputRef_z = calloc(DimTotal, sizeof(float));
-
-		/* calculate gradient field (smoothed) for the reference volume */
-		GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-		/* begin iterations */
+        
+        /* calculate gradient field (smoothed) for the reference volume */
+        GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+        /* begin iterations */
         for(ll=0; ll<iterationsNumb; ll++) {
-
+            
             if ((epsil != 0.0f)  && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-					 /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+            
+            /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
             ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* computing the gradient of the objective function */
             Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-
+            
             /*Taking a step towards minus of the gradient*/
             Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* projection step */
             Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
-
+            
             /*updating R and t*/
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
             Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
-
-            /*storing old values*/            
+            
+            /*storing old values*/
             copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             tk = tkp1;
-
+            
             /* check early stopping criteria */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-              for(j=0; j<DimTotal; j++)
-              {
-                  re += powf(Output[j] - Output_prev[j],2);
-                  re1 += powf(Output[j],2);
-              }
-           re = sqrtf(re)/sqrtf(re1);
-           if (re < epsil)  count++;
-           if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
         }
-
-		free(P3); free(P3_prev); free(R3); free(InputRef_z);
-	}
-  if (epsil != 0.0f) free(Output_prev);
-  free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
-
-   /*adding info into info_vector */
-   infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
-   infovector[1] = re;  /* reached tolerance */
-
-  return 0;
+        
+        free(P3); free(P3_prev); free(R3); free(InputRef_z);
+    }
+    if (epsil != 0.0f) free(Output_prev);
+    free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
+    
+    /*adding info into info_vector */
+    infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
+    infovector[1] = re;  /* reached tolerance */
+    
+    return 0;
 }
 
 
@@ -191,9 +191,9 @@ float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, lo
     long i,j,index;
     float val1, val2, gradX, gradY, magn;
 #pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
             /* zero boundary conditions */
             if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[j*dimX + (i+1)];}
             if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(j+1)*dimX + i];}
@@ -212,9 +212,9 @@ float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX
     long i,j,index;
     float in_prod;
 #pragma omp parallel for shared(R1, R2, B_x, B_y) private(index,i,j,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
             in_prod = R1[index]*B_x[index] + R2[index]*B_y[index];   /* calculate inner product */
             R1[index] = R1[index] - in_prod*B_x[index];
             R2[index] = R2[index] - in_prod*B_y[index];
@@ -227,9 +227,9 @@ float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long d
     float val1, val2;
     long i,j,index;
 #pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
             /* boundary conditions  */
             if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
             if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
@@ -243,20 +243,20 @@ float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *
     long i,j,index;
     multip = (1.0f/(8.0f*lambda));
 #pragma omp parallel for shared(P1,P2,D,R1,R2,B_x,B_y,multip) private(i,j,index,val1,val2,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            index = j*dimX+i;
             /* boundary conditions */
             if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
             if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
-
+            
             in_prod = val1*B_x[index] + val2*B_y[index];   /* calculate inner product */
             val1 = val1 - in_prod*B_x[index];
             val2 = val2 - in_prod*B_y[index];
-
+            
             P1[index] = R1[index] + multip*val1;
             P2[index] = R2[index] + multip*val2;
-
+            
         }}
     return 1;
 }
@@ -268,25 +268,25 @@ float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal)
         /* isotropic TV*/
 #pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
         for(i=0; i<DimTotal; i++) {
-                denom = powf(P1[i],2) +  powf(P2[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                }
+            denom = powf(P1[i],2) +  powf(P2[i],2);
+            if (denom > 1.0f) {
+                sq_denom = 1.0f/sqrtf(denom);
+                P1[i] = P1[i]*sq_denom;
+                P2[i] = P2[i]*sq_denom;
             }
+        }
     }
     else {
         /* anisotropic TV*/
 #pragma omp parallel for shared(P1,P2) private(i,val1,val2)
         for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-            }
+            val1 = fabs(P1[i]);
+            val2 = fabs(P2[i]);
+            if (val1 < 1.0f) {val1 = 1.0f;}
+            if (val2 < 1.0f) {val2 = 1.0f;}
+            P1[i] = P1[i]/val1;
+            P2[i] = P2[i]/val2;
+        }
     }
     return 1;
 }
@@ -297,9 +297,9 @@ float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1
     multip = ((tk-1.0f)/tkp1);
 #pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
     for(i=0; i<DimTotal; i++) {
-            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-        }
+        R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+        R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+    }
     return 1;
 }
 
@@ -311,25 +311,26 @@ float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, l
     long i, j, k, index;
     float val1, val2, val3, gradX, gradY, gradZ, magn;
 #pragma omp parallel for shared(B, B_x, B_y, B_z) private(i,j,k,index,val1,val2,val3,gradX,gradY,gradZ,magn)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-			index = (dimX*dimY)*k + j*dimX+i;
-
-            /* zero boundary conditions */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
-            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
-
-            gradX = val1 - B[index];
-            gradY = val2 - B[index];
-            gradZ = val3 - B[index];
-            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
-            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
-            B_x[index] = gradX/magn;
-            B_y[index] = gradY/magn;
-            B_z[index] = gradZ/magn;
-        }}}
+            for(i=0; i<dimX; i++) {
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                /* zero boundary conditions */
+                if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
+                if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
+                if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
+                
+                gradX = val1 - B[index];
+                gradY = val2 - B[index];
+                gradZ = val3 - B[index];
+                magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+                magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+                B_x[index] = gradX/magn;
+                B_y[index] = gradY/magn;
+                B_z[index] = gradZ/magn;
+            }}}
     return 1;
 }
 
@@ -338,15 +339,15 @@ float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y
     long i,j,k,index;
     float in_prod;
 #pragma omp parallel for shared(R1, R2, R3, B_x, B_y, B_z) private(index,i,j,k,in_prod)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-			index = (dimX*dimY)*k + j*dimX+i;
-            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index];   /* calculate inner product */
-            R1[index] = R1[index] - in_prod*B_x[index];
-            R2[index] = R2[index] - in_prod*B_y[index];
-            R3[index] = R3[index] - in_prod*B_z[index];
-        }}}
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index];   /* calculate inner product */
+                R1[index] = R1[index] - in_prod*B_x[index];
+                R2[index] = R2[index] - in_prod*B_y[index];
+                R3[index] = R3[index] - in_prod*B_z[index];
+            }}}
     return 1;
 }
 
@@ -355,10 +356,10 @@ float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lam
     float val1, val2, val3;
     long i,j,k,index;
 #pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* boundary conditions */
                 if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
                 if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
@@ -373,20 +374,20 @@ float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *
     long i,j,k, index;
     multip = (1.0f/(26.0f*lambda));
 #pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3,in_prod)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* boundary conditions */
                 if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
                 if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
                 if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
-
+                
                 in_prod = val1*B_x[index] + val2*B_y[index] + val3*B_z[index];   /* calculate inner product */
                 val1 = val1 - in_prod*B_x[index];
                 val2 = val2 - in_prod*B_y[index];
                 val3 = val3 - in_prod*B_z[index];
-
+                
                 P1[index] = R1[index] + multip*val1;
                 P2[index] = R2[index] + multip*val2;
                 P3[index] = R3[index] + multip*val3;
@@ -398,33 +399,33 @@ float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
     float val1, val2, val3, denom, sq_denom;
     long i;
     if (methTV == 0) {
-	/* isotropic TV*/
-	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
-    for(i=0; i<DimTotal; i++) {
-				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                    P3[i] = P3[i]*sq_denom;
-                }
-			}
-	}
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+        for(i=0; i<DimTotal; i++) {
+            denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+            if (denom > 1.0f) {
+                sq_denom = 1.0f/sqrtf(denom);
+                P1[i] = P1[i]*sq_denom;
+                P2[i] = P2[i]*sq_denom;
+                P3[i] = P3[i]*sq_denom;
+            }
+        }
+    }
     else {
-    /* anisotropic TV*/
+        /* anisotropic TV*/
 #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
-    for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                val3 = fabs(P3[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-                P3[i] = P3[i]/val3;
-            }
-		}
+        for(i=0; i<DimTotal; i++) {
+            val1 = fabs(P1[i]);
+            val2 = fabs(P2[i]);
+            val3 = fabs(P3[i]);
+            if (val1 < 1.0f) {val1 = 1.0f;}
+            if (val2 < 1.0f) {val2 = 1.0f;}
+            if (val3 < 1.0f) {val3 = 1.0f;}
+            P1[i] = P1[i]/val1;
+            P2[i] = P2[i]/val2;
+            P3[i] = P3[i]/val3;
+        }
+    }
     return 1;
 }
 float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
@@ -434,9 +435,9 @@ float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3
     multip = ((tk-1.0f)/tkp1);
 #pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
     for(i=0; i<DimTotal; i++) {
-                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
-            }
+        R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+        R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+        R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+    }
     return 1;
 }
diff --git a/src/Core/regularisers_CPU/LLT_ROF_core.c b/src/Core/regularisers_CPU/LLT_ROF_core.c
index 1064340..e097c90 100644
--- a/src/Core/regularisers_CPU/LLT_ROF_core.c
+++ b/src/Core/regularisers_CPU/LLT_ROF_core.c
@@ -1,21 +1,21 @@
 /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "LLT_ROF_core.h"
 #define EPS_LLT 1.0e-12
@@ -30,56 +30,56 @@ int signLLT(float x) {
 
 /* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
  *
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
-* lambdaLLT starting with smaller values.
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step
-* 5. iter - iterations number (for both models)
-* 6. eplsilon: tolerance constant
-*
-* Output:
-* [1] Filtered/regularized image/volume
-* [2] Information vector which contains [iteration no., reached tolerance]
-*
-* References:
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
+ * This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+ * The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+ * lambdaLLT starting with smaller values.
+ *
+ * Input Parameters:
+ * 1. U0 - original noise image/volume
+ * 2. lambdaROF - ROF-related regularisation parameter
+ * 3. lambdaLLT - LLT-related regularisation parameter
+ * 4. tau - time-marching step
+ * 5. iter - iterations number (for both models)
+ * 6. eplsilon: tolerance constant
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
+ *
+ * References:
+ * [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+ * [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ */
 
 float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ)
 {
-		long DimTotal;
+    long DimTotal;
     int ll, j;
     float re, re1;
     re = 0.0f; re1 = 0.0f;
     int count = 0;
-
-		float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL, *Output_prev=NULL;
-				DimTotal = (long)(dimX*dimY*dimZ);
-
-        D1_ROF = calloc(DimTotal, sizeof(float));
-        D2_ROF = calloc(DimTotal, sizeof(float));
-        D3_ROF = calloc(DimTotal, sizeof(float));
-
-        D1_LLT = calloc(DimTotal, sizeof(float));
-        D2_LLT = calloc(DimTotal, sizeof(float));
-        D3_LLT = calloc(DimTotal, sizeof(float));
-
-        copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize  */
-        if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
-
-		for(ll = 0; ll < iterationsNumb; ll++) {
-            if ((epsil != 0.0f) && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-            if (dimZ == 1) {
-			      /* 2D case */
-			      /****************ROF******************/
-			      /* calculate first-order differences */
+    
+    float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL, *Output_prev=NULL;
+    DimTotal = (long)(dimX*dimY*dimZ);
+    
+    D1_ROF = calloc(DimTotal, sizeof(float));
+    D2_ROF = calloc(DimTotal, sizeof(float));
+    D3_ROF = calloc(DimTotal, sizeof(float));
+    
+    D1_LLT = calloc(DimTotal, sizeof(float));
+    D2_LLT = calloc(DimTotal, sizeof(float));
+    D3_LLT = calloc(DimTotal, sizeof(float));
+    
+    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize  */
+    if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
+    
+    for(ll = 0; ll < iterationsNumb; ll++) {
+        if ((epsil != 0.0f) && (ll % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+        if (dimZ == 1) {
+            /* 2D case */
+            /****************ROF******************/
+            /* calculate first-order differences */
             D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), 1l);
             D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), 1l);
             /****************LLT******************/
@@ -87,10 +87,10 @@ float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lam
             der2D_LLT(Output, D1_LLT, D2_LLT, (long)(dimX), (long)(dimY), 1l);
             /* Joint update for ROF and LLT models */
             Update2D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), 1l);
-            }
-            else {
-			      /* 3D case */
-			      /* calculate first-order differences */
+        }
+        else {
+            /* 3D case */
+            /* calculate first-order differences */
             D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
             D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
             D3_func_ROF(Output, D3_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
@@ -99,30 +99,30 @@ float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lam
             der3D_LLT(Output, D1_LLT, D2_LLT, D3_LLT,(long)(dimX), (long)(dimY), (long)(dimZ));
             /* Joint update for ROF and LLT models */
             Update3D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-			       }
-
-             /* check early stopping criteria */
-             if ((epsil != 0.0f) && (ll % 5 == 0)) {
-             re = 0.0f; re1 = 0.0f;
-              for(j=0; j<DimTotal; j++)
-              {
-                  re += powf(Output[j] - Output_prev[j],2);
-                  re1 += powf(Output[j],2);
-              }
-               re = sqrtf(re)/sqrtf(re1);
-               if (re < epsil)  count++;
-               if (count > 3) break;
-             }
-
+        }
+        
+        /* check early stopping criteria */
+        if ((epsil != 0.0f) && (ll % 5 == 0)) {
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += powf(Output[j] - Output_prev[j],2);
+                re1 += powf(Output[j],2);
+            }
+            re = sqrtf(re)/sqrtf(re1);
+            if (re < epsil)  count++;
+            if (count > 3) break;
+        }
+        
     } /*end of iterations*/
     free(D1_LLT);free(D2_LLT);free(D3_LLT);
     free(D1_ROF);free(D2_ROF);free(D3_ROF);
     if (epsil != 0.0f) free(Output_prev);
-
+    
     /*adding info into info_vector */
     infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
     infovector[1] = re;  /* reached tolerance */
-	  return 0;
+    return 0;
 }
 
 /*************************************************************************/
@@ -130,65 +130,65 @@ float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lam
 /*************************************************************************/
 float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ)
 {
-	long i, j, index, i_p, i_m, j_m, j_p;
-	float dxx, dyy, denom_xx, denom_yy;
+    long i, j, index, i_p, i_m, j_m, j_p;
+    float dxx, dyy, denom_xx, denom_yy;
 #pragma omp parallel for shared(U,D1,D2) private(i, j, index, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			index = j*dimX+i;
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
-			dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
-
-			denom_xx = fabs(dxx) + EPS_LLT;
-			denom_yy = fabs(dyy) + EPS_LLT;
-
-			D1[index] = dxx / denom_xx;
-			D2[index] = dyy / denom_yy;
-		}
-	}
-	return 1;
+    for (j = 0; j<dimY; j++) {
+        for (i = 0; i<dimX; i++) {
+            index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+            i_m = i - 1; if (i_m < 0) i_m = i + 1;
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            
+            dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
+            dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
+            
+            denom_xx = fabs(dxx) + EPS_LLT;
+            denom_yy = fabs(dyy) + EPS_LLT;
+            
+            D1[index] = dxx / denom_xx;
+            D2[index] = dyy / denom_yy;
+        }
+    }
+    return 1;
 }
 
 float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ)
- {
- 	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
- 	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
- #pragma omp parallel for shared(U,D1,D2,D3) private(i, j, index, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
- 	for (i = 0; i<dimX; i++) {
- 		for (j = 0; j<dimY; j++) {
- 			for (k = 0; k<dimZ; k++) {
-				/* symmetric boundary conditions (Neuman) */
-				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-				i_m = i - 1; if (i_m < 0) i_m = i + 1;
-				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-				j_m = j - 1; if (j_m < 0) j_m = j + 1;
- 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
- 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-
-				index = (dimX*dimY)*k + j*dimX+i;
-
- 				dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
- 				dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
- 				dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
-
- 				denom_xx = fabs(dxx) + EPS_LLT;
- 				denom_yy = fabs(dyy) + EPS_LLT;
- 				denom_zz = fabs(dzz) + EPS_LLT;
-
- 				D1[index] = dxx / denom_xx;
- 				D2[index] = dyy / denom_yy;
- 				D3[index] = dzz / denom_zz;
- 			}
- 		}
- 	}
- 	return 1;
- }
+{
+    long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+    float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, index, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
+    for (k = 0; k<dimZ; k++) {
+        for (j = 0; j<dimY; j++) {
+            for (i = 0; i<dimX; i++) {
+                /* symmetric boundary conditions (Neuman) */
+                i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+                i_m = i - 1; if (i_m < 0) i_m = i + 1;
+                j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+                j_m = j - 1; if (j_m < 0) j_m = j + 1;
+                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+                k_m = k - 1; if (k_m < 0) k_m = k + 1;
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
+                dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
+                dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
+                
+                denom_xx = fabs(dxx) + EPS_LLT;
+                denom_yy = fabs(dyy) + EPS_LLT;
+                denom_zz = fabs(dzz) + EPS_LLT;
+                
+                D1[index] = dxx / denom_xx;
+                D2[index] = dyy / denom_yy;
+                D3[index] = dzz / denom_zz;
+            }
+        }
+    }
+    return 1;
+}
 
 /*************************************************************************/
 /**********************ROF-related functions *****************************/
@@ -199,13 +199,13 @@ float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
     long i,j,k,i1,i2,k1,j1,j2,k2,index;
-
+    
     if (dimZ > 1) {
 #pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
+        for (k = 0; k<dimZ; k++) {
+            for (j = 0; j<dimY; j++) {
+                for (i = 0; i<dimX; i++) {
+                    index = (dimX*dimY)*k + j*dimX+i;
                     /* symmetric boundary conditions (Neuman) */
                     i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                     i2 = i - 1; if (i2 < 0) i2 = i+1;
@@ -213,17 +213,17 @@ float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
                     j2 = j - 1; if (j2 < 0) j2 = j+1;
                     k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                     k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                    
                     /* Forward-backward differences */
                     NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
                     NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
                     /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
                     NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-
+                    
                     NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                     NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-
-
+                    
+                    
                     denom1 = NOMx_1*NOMx_1;
                     denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
                     denom2 = denom2*denom2;
@@ -237,19 +237,19 @@ float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
 #pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
+                index = j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j1 = j + 1; if (j1 >= dimY) j1 = j-1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
                 /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
                 NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
-
+                
                 denom1 = NOMx_1*NOMx_1;
                 denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
                 denom2 = denom2*denom2;
@@ -264,13 +264,13 @@ float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
     long i,j,k,i1,i2,k1,j1,j2,k2,index;
-
+    
     if (dimZ > 1) {
 #pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
+        for (k = 0; k<dimZ; k++) {
+            for (j = 0; j<dimY; j++) {
+                for (i = 0; i<dimX; i++) {
+                    index = (dimX*dimY)*k + j*dimX+i;
                     /* symmetric boundary conditions (Neuman) */
                     i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                     i2 = i - 1; if (i2 < 0) i2 = i+1;
@@ -278,16 +278,16 @@ float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
                     j2 = j - 1; if (j2 < 0) j2 = j+1;
                     k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                     k2 = k - 1; if (k2 < 0) k2 = k+1;
-
-
+                    
+                    
                     /* Forward-backward differences */
                     NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
                     NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
                     NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
                     NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                     NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-
-
+                    
+                    
                     denom1 = NOMy_1*NOMy_1;
                     denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                     denom2 = denom2*denom2;
@@ -301,19 +301,19 @@ float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
 #pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
+                index = j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j1 = j + 1; if (j1 >= dimY) j1 = j-1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
                 NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
                 /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
-
+                
                 denom1 = NOMy_1*NOMy_1;
                 denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                 denom2 = denom2*denom2;
@@ -329,12 +329,12 @@ float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
     long index,i,j,k,i1,i2,k1,j1,j2,k2;
-
+    
 #pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
-    for(j=0; j<dimY; j++) {
-        for(i=0; i<dimX; i++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
+    for (k = 0; k<dimZ; k++) {
+        for (j = 0; j<dimY; j++) {
+            for (i = 0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
@@ -342,7 +342,7 @@ float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
                 k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                 k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
@@ -350,7 +350,7 @@ float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
                 NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
                 NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                 /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
-
+                
                 denom1 = NOMz_1*NOMz_1;
                 denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                 denom2 = denom2*denom2;
@@ -368,69 +368,69 @@ float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
 
 float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
 {
-	long i, j, index, i_p, i_m, j_m, j_p;
-	float div, laplc, dxx, dyy, dv1, dv2;
+    long i, j, index, i_p, i_m, j_m, j_p;
+    float div, laplc, dxx, dyy, dv1, dv2;
 #pragma omp parallel for shared(U,U0) private(i, j, index, i_p, i_m, j_m, j_p, laplc, div, dxx, dyy, dv1, dv2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			index = j*dimX+i;
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			/*LLT-related part*/
-			dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
-			dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
-			laplc = dxx + dyy; /*build Laplacian*/
-
-			/*ROF-related part*/
-			dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
+    for (j = 0; j<dimY; j++) {
+        for (i = 0; i<dimX; i++) {
+            index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+            i_m = i - 1; if (i_m < 0) i_m = i + 1;
+            j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+            j_m = j - 1; if (j_m < 0) j_m = j + 1;
+            
+            /*LLT-related part*/
+            dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
+            dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
+            laplc = dxx + dyy; /*build Laplacian*/
+            
+            /*ROF-related part*/
+            dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
             dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
-			div = dv1 + dv2; /*build Divirgent*/
-
-			/*combine all into one cost function to minimise */
+            div = dv1 + dv2; /*build Divirgent*/
+            
+            /*combine all into one cost function to minimise */
             U[index] += tau*(lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-		}
-	}
-	return *U;
+        }
+    }
+    return *U;
 }
 
 float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
 {
-	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
-	float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
+    long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+    float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
 #pragma omp parallel for shared(U,U0) private(i, j, k, index, i_p, i_m, j_m, j_p, k_p, k_m, laplc, div, dxx, dyy, dzz, dv1, dv2, dv3)
- 	for (i = 0; i<dimX; i++) {
- 		for (j = 0; j<dimY; j++) {
- 			for (k = 0; k<dimZ; k++) {
-				/* symmetric boundary conditions (Neuman) */
-				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-				i_m = i - 1; if (i_m < 0) i_m = i + 1;
-				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-				j_m = j - 1; if (j_m < 0) j_m = j + 1;
- 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
- 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-
-				index = (dimX*dimY)*k + j*dimX+i;
-
-				/*LLT-related part*/
-				dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
-				dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
-				dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
-				laplc = dxx + dyy + dzz; /*build Laplacian*/
-
-				/*ROF-related part*/
-				dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
-				dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
-				dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
-				div = dv1 + dv2 + dv3; /*build Divirgent*/
-
-				/*combine all into one cost function to minimise */
-				U[index] += tau*(lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-			}
-		}
-	}
-	return *U;
+    for (k = 0; k<dimZ; k++) {
+        for (j = 0; j<dimY; j++) {
+            for (i = 0; i<dimX; i++) {
+                /* symmetric boundary conditions (Neuman) */
+                i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+                i_m = i - 1; if (i_m < 0) i_m = i + 1;
+                j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+                j_m = j - 1; if (j_m < 0) j_m = j + 1;
+                k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+                k_m = k - 1; if (k_m < 0) k_m = k + 1;
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                /*LLT-related part*/
+                dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
+                dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
+                dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
+                laplc = dxx + dyy + dzz; /*build Laplacian*/
+                
+                /*ROF-related part*/
+                dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
+                dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
+                dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
+                div = dv1 + dv2 + dv3; /*build Divirgent*/
+                
+                /*combine all into one cost function to minimise */
+                U[index] += tau*(lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+            }
+        }
+    }
+    return *U;
 }
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.c b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
index de1c173..7b1368d 100644
--- a/src/Core/regularisers_CPU/Nonlocal_TV_core.c
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
@@ -34,52 +34,52 @@
  * 5. Weights_ij(k) - associated weights
  * 6. regularisation parameter
  * 7. iterations number
-
+ *
  * Output:
  * 1. denoised image/volume
  * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
-
+ *
  */
 /*****************************************************************************/
 
 float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM)
 {
-
+    
     long i, j, k;
     int iter;
     lambdaReg = 1.0f/lambdaReg;
-
+    
     /*****2D INPUT *****/
     if (dimZ == 0) {
-	  copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-     for(iter=0; iter<IterNumb; iter++) {
+        copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
+        /* for each pixel store indeces of the most similar neighbours (patches) */
+        for(iter=0; iter<IterNumb; iter++) {
 #pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
-   for(j=0; j<(long)(dimY); j++) {
-      for(i=0; i<(long)(dimX); i++) {
-             /*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/  /* NLM - H1 penalty */
-             if (switchM == 1) {
-               NLM_TV_2D(Output, A_orig, H_j, H_i, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
-             }
-             else {
-               NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
-             }
-           }}
-          }
+            for(j=0; j<(long)(dimY); j++) {
+                for(i=0; i<(long)(dimX); i++) {
+                    /*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/  /* NLM - H1 penalty */
+                    if (switchM == 1) {
+                        NLM_TV_2D(Output, A_orig, H_j, H_i, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
+                    }
+                    else {
+                        NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
+                    }
+                }}
+        }
     }
     else {
-     /*****3D INPUT *****/
+        /*****3D INPUT *****/
         copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-     for(iter=0; iter<IterNumb; iter++) {
+        /* for each pixel store indeces of the most similar neighbours (patches) */
+        for(iter=0; iter<IterNumb; iter++) {
 #pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
-      for(i=0; i<(long)(dimX); i++) {
-            for(j=0; j<(long)(dimY); j++) {
-               for(k=0; k<(long)(dimZ); k++) {
-            /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
-            NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg);   /* NLM - TV penalty */
-           }}}
-          }
+            for(k=0; k<(long)(dimZ); k++) {
+                for(j=0; j<(long)(dimY); j++) {
+                    for(i=0; i<(long)(dimX); i++) {
+                        /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
+                        NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg);   /* NLM - TV penalty */
+                    }}}
+        }
     }
     return *Output;
 }
@@ -87,61 +87,60 @@ float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, un
 /***********<<<<Main Function for NLM - H1 penalty>>>>**********/
 float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
 {
-	long x, i1, j1, index, index_m;
-	float value = 0.0f, normweight  = 0.0f;
-
-	index_m = j*dimX+i;
-	for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		value += A[j1*dimX+i1]*Weights[index];
-		normweight += Weights[index];
-	}
-	 A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
+    long x, i1, j1, index, index_m;
+    float value = 0.0f, normweight  = 0.0f;
+    
+    index_m = j*dimX+i;
+    for(x=0; x < NumNeighb; x++) {
+        index =  (dimX*dimY*x) + j*dimX+i;
+        i1 = H_i[index];
+        j1 = H_j[index];
+        value += A[j1*dimX+i1]*Weights[index];
+        normweight += Weights[index];
+    }
+    A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
     return *A;
 }
 /*3D version*/
 float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
 {
-	long x, i1, j1, k1, index;
-	float value = 0.0f, normweight  = 0.0f;
-
-	for(x=0; x < NumNeighb; x++) {
-	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		k1 = H_k[index];
-		value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
-		normweight += Weights[index];
-	}
+    long x, i1, j1, k1, index;
+    float value = 0.0f, normweight  = 0.0f;
+    
+    for(x=0; x < NumNeighb; x++) {
+        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+        i1 = H_i[index];
+        j1 = H_j[index];
+        k1 = H_k[index];
+        value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
+        normweight += Weights[index];
+    }
     A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
     return *A;
 }
 
-
 /***********<<<<Main Function for NLM - TV penalty>>>>**********/
 float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
 {
-	long x, i1, j1, index, index_m;
-	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-
-	 index_m = j*dimX+i;
-
-	for(x=0; x < NumNeighb; x++) {
-		index =  (dimX*dimY*x) + j*dimX+i; /*c*/
-		i1 = H_i[index];
-		j1 = H_j[index];
-		NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
-	}
-
+    long x, i1, j1, index, index_m;
+    float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+    
+    index_m = j*dimX+i;
+    
+    for(x=0; x < NumNeighb; x++) {
+        index =  (dimX*dimY*x) + j*dimX+i; /*c*/
+        i1 = H_i[index];
+        j1 = H_j[index];
+        NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
+    }
+    
     NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
     NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-
+    
     for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i; /*c*/
-	i1 = H_i[index];
-	j1 = H_j[index];
+        index =  (dimX*dimY*x) + j*dimX+i; /*c*/
+        i1 = H_i[index];
+        j1 = H_j[index];
         value += A[j1*dimX+i1]*NLCoeff*Weights[index];
         normweight += Weights[index]*NLCoeff;
     }
@@ -151,25 +150,25 @@ float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_
 /*3D version*/
 float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
 {
-	long x, i1, j1, k1, index;
-	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-
-	for(x=0; x < NumNeighb; x++) {
-	index =  dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		k1 = H_k[index];
-	        NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
-	}
-
+    long x, i1, j1, k1, index;
+    float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+    
+    for(x=0; x < NumNeighb; x++) {
+        index =  dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+        i1 = H_i[index];
+        j1 = H_j[index];
+        k1 = H_k[index];
+        NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
+    }
+    
     NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
     NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-
+    
     for(x=0; x < NumNeighb; x++) {
-	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-	i1 = H_i[index];
-	j1 = H_j[index];
-	k1 = H_k[index];
+        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+        i1 = H_i[index];
+        j1 = H_j[index];
+        k1 = H_k[index];
         value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
         normweight += Weights[index]*NLCoeff;
     }
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.c b/src/Core/regularisers_CPU/PatchSelect_core.c
index 8581797..d80353b 100644
--- a/src/Core/regularisers_CPU/PatchSelect_core.c
+++ b/src/Core/regularisers_CPU/PatchSelect_core.c
@@ -77,13 +77,13 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
             }} /*main neighb loop */
         /* for each pixel store indeces of the most similar neighbours (patches) */
 #pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
-    for(i=0; i<(long)(dimX); i++) {
-          for(j=0; j<(long)(dimY); j++) {
+        for(j=0; j<(long)(dimY); j++) {
+            for(i=0; i<(long)(dimX); i++) {
                 Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
             }}
     }
     else {
-    /****************3D INPUT ***************/
+        /****************3D INPUT ***************/
         /* generate a 3D Gaussian kernel for NLM procedure */
         Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
         counterG = 0;
@@ -93,12 +93,12 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
                     Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
                     counterG++;
                 }}} /*main neighb loop */
-
+        
         /* for each voxel store indeces of the most similar neighbours (patches) */
 #pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
-        for(i=0; i<dimX; i++) {
+        for(k=0; k<dimZ; k++) {
             for(j=0; j<dimY; j++) {
-                for(k=0; k<dimZ; k++) {
+                for(i=0; i<dimX; i++) {
                     Indeces3D(A, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
                 }}}
     }
@@ -111,13 +111,13 @@ float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *W
     long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
     float *Weight_Vec, normsum;
     unsigned short *ind_i, *ind_j;
-
+    
     sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-
+    
     Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
     ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
     ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-
+    
     counter = 0;
     for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
         for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
@@ -149,15 +149,15 @@ float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *W
     /* do sorting to choose the most prominent weights [HIGH to LOW] */
     /* and re-arrange indeces accordingly */
     for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]);
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);
+        for (y = 0; y < counter-x-1; y++)  {
+            if (Weight_Vec[y] < Weight_Vec[y+1]) {
+                swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+                swapUS(&ind_i[y], &ind_i[y+1]);
+                swapUS(&ind_j[y], &ind_j[y+1]);
             }
-    	}
+        }
     }
-     /*sorting loop finished*/
+    /*sorting loop finished*/
     /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
     for(x=0; x < NumNeighb; x++) {
         index = (dimX*dimY*x) + j*dimX+i;
@@ -176,14 +176,14 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned
     long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
     float *Weight_Vec, normsum, temp;
     unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
-
+    
     sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
-
+    
     Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
     ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
     ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
     ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-
+    
     counter = 0l;
     for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
         for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
@@ -237,18 +237,18 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned
                 ind_k[y] = temp_k;
             }}}
     /*sorting loop finished*/
-
+    
     /*now select the NumNeighb more prominent weights and store into arrays */
     for(x=0; x < NumNeighb; x++) {
         index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-
+        
         H_i[index] = ind_i[x];
         H_j[index] = ind_j[x];
         H_k[index] = ind_k[x];
-
+        
         Weights[index] = Weight_Vec[x];
     }
-
+    
     free(ind_i);
     free(ind_j);
     free(ind_k);
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
index 004a0f2..cf08652 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.c
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -56,46 +56,46 @@ float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lam
     int i;
     long DimTotal,j;
     DimTotal = (long)(dimX*dimY*dimZ);
-
+    
     D1 = calloc(DimTotal, sizeof(float));
     D2 = calloc(DimTotal, sizeof(float));
     D3 = calloc(DimTotal, sizeof(float));
-
+    
     /* copy into output */
     copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
     if (epsil != 0.0f) Output_prev = calloc(DimTotal, sizeof(float));
-
+    
     /* start TV iterations */
     for(i=0; i < iterationsNumb; i++) {
-            if ((epsil != 0.0f) && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-            /* calculate differences */
-            D1_func(Output, D1, (long)(dimX), (long)(dimY), (long)(dimZ));
-            D2_func(Output, D2, (long)(dimX), (long)(dimY), (long)(dimZ));
-            if (dimZ > 1) D3_func(Output, D3, (long)(dimX), (long)(dimY), (long)(dimZ));
-            TV_kernel(D1, D2, D3, Output, Input, lambdaPar, lambda_is_arr, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-            /* check early stopping criteria */
-            if ((epsil != 0.0f) && (i % 5 == 0)) {
+        if ((epsil != 0.0f) && (i % 5 == 0)) copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+        /* calculate differences */
+        D1_func(Output, D1, (long)(dimX), (long)(dimY), (long)(dimZ));
+        D2_func(Output, D2, (long)(dimX), (long)(dimY), (long)(dimZ));
+        if (dimZ > 1) D3_func(Output, D3, (long)(dimX), (long)(dimY), (long)(dimZ));
+        TV_kernel(D1, D2, D3, Output, Input, lambdaPar, lambda_is_arr, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+        /* check early stopping criteria */
+        if ((epsil != 0.0f) && (i % 5 == 0)) {
             re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(Output[j] - Output_prev[j],2);
-        	        re1 += powf(Output[j],2);
-        	    }
-              re = sqrtf(re)/sqrtf(re1);
-              if (re < epsil)  count++;
-              if (count > 3) break;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += powf(Output[j] - Output_prev[j],2);
+                re1 += powf(Output[j],2);
             }
-		}
+            re = sqrtf(re)/sqrtf(re1);
+            if (re < epsil)  count++;
+            if (count > 3) break;
+        }
+    }
     free(D1);free(D2); free(D3);
     if (epsil != 0.0f) free(Output_prev);
-
+    
     /*adding info into info_vector */
     infovector[0] = (float)(i);  /*iterations number (if stopped earlier based on tolerance)*/
     infovector[1] = re;  /* reached tolerance */
-
-	return 0;
+    
+    return 0;
 }
 
 /* calculate differences 1 */
@@ -103,13 +103,13 @@ float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
     long i,j,k,i1,i2,k1,j1,j2,k2,index;
-
+    
     if (dimZ > 1) {
 #pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
+        for(k=0; k<dimZ; k++) {
+            for(j=0; j<dimY; j++) {
+                for(i=0; i<dimX; i++) {
+                    index = (dimX*dimY)*k + j*dimX+i;
                     /* symmetric boundary conditions (Neuman) */
                     i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                     i2 = i - 1; if (i2 < 0) i2 = i+1;
@@ -117,17 +117,17 @@ float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
                     j2 = j - 1; if (j2 < 0) j2 = j+1;
                     k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                     k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                    
                     /* Forward-backward differences */
                     NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
                     NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
                     /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
                     NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-
+                    
                     NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                     NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-
-
+                    
+                    
                     denom1 = NOMx_1*NOMx_1;
                     denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
                     denom2 = denom2*denom2;
@@ -141,19 +141,19 @@ float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
 #pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
+                index = j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j1 = j + 1; if (j1 >= dimY) j1 = j-1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
                 /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
                 NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
-
+                
                 denom1 = NOMx_1*NOMx_1;
                 denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
                 denom2 = denom2*denom2;
@@ -168,12 +168,12 @@ float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
     long i,j,k,i1,i2,k1,j1,j2,k2,index;
-
+    
     if (dimZ > 1) {
 #pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
+        for(k=0; k<dimZ; k++) {
+            for(j=0; j<dimY; j++) {
+                for(i=0; i<dimX; i++) {
                     index = (dimX*dimY)*k + j*dimX+i;
                     /* symmetric boundary conditions (Neuman) */
                     i1 = i + 1; if (i1 >= dimX) i1 = i-1;
@@ -182,15 +182,15 @@ float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
                     j2 = j - 1; if (j2 < 0) j2 = j+1;
                     k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                     k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                    
                     /* Forward-backward differences */
                     NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
                     NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
                     NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
                     NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                     NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-
-
+                    
+                    
                     denom1 = NOMy_1*NOMy_1;
                     denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                     denom2 = denom2*denom2;
@@ -204,19 +204,19 @@ float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
 #pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
         for(j=0; j<dimY; j++) {
             for(i=0; i<dimX; i++) {
-		index = j*dimX+i;
+                index = j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j1 = j + 1; if (j1 >= dimY) j1 = j-1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
                 NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
                 /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
-
+                
                 denom1 = NOMy_1*NOMy_1;
                 denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                 denom2 = denom2*denom2;
@@ -232,12 +232,12 @@ float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
 {
     float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
     long index,i,j,k,i1,i2,k1,j1,j2,k2;
-
+    
 #pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
-    for(j=0; j<dimY; j++) {
-        for(i=0; i<dimX; i++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
+    for(k=0; k<dimZ; k++) {
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
@@ -245,7 +245,7 @@ float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
                 k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                 k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                
                 /* Forward-backward differences */
                 NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
                 NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
@@ -253,7 +253,7 @@ float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
                 NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
                 NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
                 /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
-
+                
                 denom1 = NOMz_1*NOMz_1;
                 denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
                 denom2 = denom2*denom2;
@@ -270,12 +270,12 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lamb
 {
     float dv1, dv2, dv3, lambda_val;
     long index,i,j,k,i1,i2,k1,j1,j2,k2;
-
+    
     if (dimZ > 1) {
 #pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3,lambda_val)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
+        for(k=0; k<dimZ; k++) {
+            for(j=0; j<dimY; j++) {
+                for(i=0; i<dimX; i++) {
                     index = (dimX*dimY)*k + j*dimX+i;
                     lambda_val = *(lambda + index* lambda_is_arr);
                     /* symmetric boundary conditions (Neuman) */
@@ -285,12 +285,12 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lamb
                     j2 = j - 1; if (j2 < 0) j2 = j+1;
                     k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
                     k2 = k - 1; if (k2 < 0) k2 = k+1;
-
+                    
                     /*divergence components */
                     dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
                     dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
                     dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
-
+                    
                     B[index] += tau*(lambda_val*(dv1 + dv2 + dv3) - (B[index] - A[index]));
                 }}}
     }
@@ -305,11 +305,11 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lamb
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j1 = j + 1; if (j1 >= dimY) j1 = j-1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
+                
                 /* divergence components  */
                 dv1 = D1[index] - D1[j2*dimX + i];
                 dv2 = D2[index] - D2[j*dimX + i2];
-
+                
                 B[index] += tau*(lambda_val*(dv1 + dv2) - (B[index] - A[index]));
             }}
     }
diff --git a/src/Core/regularisers_CPU/SB_TV_core.c b/src/Core/regularisers_CPU/SB_TV_core.c
index 8d80787..1215baf 100755
--- a/src/Core/regularisers_CPU/SB_TV_core.c
+++ b/src/Core/regularisers_CPU/SB_TV_core.c
@@ -1,146 +1,146 @@
 /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #include "SB_TV_core.h"
 
 /* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* [1] Filtered/regularized image/volume
-* [2] Information vector which contains [iteration no., reached tolerance]
-*
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularisation parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
+ *
+ * [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+ */
 
 float SB_TV_CPU_main(float *Input, float *Output, float *infovector, float mu, int iter, float epsil, int methodTV, int dimX, int dimY, int dimZ)
 {
-	  int ll;
+    int ll;
     long j, DimTotal;
-	  float re, re1, lambda;
+    float re, re1, lambda;
     re = 0.0f; re1 = 0.0f;
     int count = 0;
     mu = 1.0f/mu;
     lambda = 2.0f*mu;
-
+    
     float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
     DimTotal = (long)(dimX*dimY*dimZ);
     Output_prev = calloc(DimTotal, sizeof(float));
-		Dx = calloc(DimTotal, sizeof(float));
-		Dy = calloc(DimTotal, sizeof(float));
-		Bx = calloc(DimTotal, sizeof(float));
-		By = calloc(DimTotal, sizeof(float));
-
-	if (dimZ == 1) {
-		    /* 2D case */
+    Dx = calloc(DimTotal, sizeof(float));
+    Dy = calloc(DimTotal, sizeof(float));
+    Bx = calloc(DimTotal, sizeof(float));
+    By = calloc(DimTotal, sizeof(float));
+    
+    if (dimZ == 1) {
+        /* 2D case */
         copyIm(Input, Output, (long)(dimX), (long)(dimY), 1l); /*initialize */
-
+        
         /* begin outer SB iterations */
         for(ll=0; ll<iter; ll++) {
-
+            
             /* storing old estimate */
             copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
-
+            
             /* perform two GS iterations (normally 2 is enough for the convergence) */
             gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
             copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
             /*GS iteration */
             gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
-
+            
             /* TV-related step */
             if (methodTV == 1)  updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
             else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
-
+            
             /* update for Bregman variables */
             updBxBy2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY));
-
+            
             /* check early stopping criteria if epsilon not equal zero */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(Output[j] - Output_prev[j],2);
-        	        re1 += powf(Output[j],2);
-        	    }
-            re = sqrtf(re)/sqrtf(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                /* stop if the norm residual is less than the tolerance EPS */
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
         }
-	}
-	else {
-		/* 3D case */
-		float *Dz=NULL, *Bz=NULL;
-
-		Dz = calloc(DimTotal, sizeof(float));
-		Bz = calloc(DimTotal, sizeof(float));
-
+    }
+    else {
+        /* 3D case */
+        float *Dz=NULL, *Bz=NULL;
+        
+        Dz = calloc(DimTotal, sizeof(float));
+        Bz = calloc(DimTotal, sizeof(float));
+        
         copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /*initialize */
-
+        
         /* begin outer SB iterations */
         for(ll=0; ll<iter; ll++) {
-
+            
             /* storing old estimate */
             copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-             /* perform two GS iterations (normally 2 is enough for the convergence) */
+            
+            /* perform two GS iterations (normally 2 is enough for the convergence) */
             gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
             copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
             /*GS iteration */
             gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
-
+            
             /* TV-related step */
             if (methodTV == 1)  updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
             else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
-
+            
             /* update for Bregman variables */
             updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* check early stopping criteria if epsilon not equal zero */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-              for(j=0; j<DimTotal; j++)
-              {
-                  re += powf(Output[j] - Output_prev[j],2);
-                  re1 += powf(Output[j],2);
-              }
-            re = sqrtf(re)/sqrtf(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) break;
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(Output[j] - Output_prev[j],2);
+                    re1 += powf(Output[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                /* stop if the norm residual is less than the tolerance EPS */
+                if (re < epsil)  count++;
+                if (count > 3) break;
             }
         }
-		free(Dz); free(Bz);
-	}
-
-  free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
-  /*adding info into info_vector */
-  infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
-  infovector[1] = re;  /* reached tolerance */
-	return 0;
+        free(Dz); free(Bz);
+    }
+    
+    free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
+    /*adding info into info_vector */
+    infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
+    infovector[1] = re;  /* reached tolerance */
+    return 0;
 }
 
 /********************************************************************/
@@ -151,18 +151,18 @@ float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, fl
     float sum, normConst;
     long i,j,i1,i2,j1,j2,index;
     normConst = 1.0f/(mu + 4.0f*lambda);
-
+    
 #pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum)
-    for(i=0; i<dimX; i++) {
+    for(j=0; j<dimY; j++) {
         /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            i2 = i-1; if (i2 < 0) i2 = i+1;
             index = j*dimX+i;
-
+            
             sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index];
             sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i];
             sum *= lambda;
@@ -178,22 +178,23 @@ float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By
     float val1, val11, val2, val22, denom_lam;
     denom_lam = 1.0f/lambda;
 #pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
             i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
             index = j*dimX+i;
-
+            
             val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
             val2 = (U[j1*dimX+i] - U[index]) + By[index];
-
+            
             val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
             val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-
+            
             if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
             if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
-
+            
         }}
     return 1;
 }
@@ -202,22 +203,22 @@ float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By,
     long i,j,i1,j1,index;
     float val1, val11, val2, denom, denom_lam;
     denom_lam = 1.0f/lambda;
-
+    
 #pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
             i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
             index = j*dimX+i;
-
+            
             val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
             val2 = (U[j1*dimX+i] - U[index]) + By[index];
-
+            
             denom = sqrt(val1*val1 + val2*val2);
-
+            
             val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-
+            
             if (denom != 0.0f) {
                 Dx[index] = val11*(val1/denom);
                 Dy[index] = val11*(val2/denom);
@@ -233,13 +234,13 @@ float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX,
 {
     long i,j,i1,j1,index;
 #pragma omp parallel for shared(U) private(index,i,j,i1,j1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        j1 = j+1; if (j1 == dimY) j1 = j-1;
+        for(i=0; i<dimX; i++) {
             /* symmetric boundary conditions (Neuman) */
             i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
             index = j*dimX+i;
-
+            
             Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index];
             By[index] += (U[j1*dimX+i] - U[index]) - Dy[index];
         }}
@@ -256,18 +257,19 @@ float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, fl
     long i,j,i1,i2,j1,j2,k,k1,k2,index;
     normConst = 1.0f/(mu + 6.0f*lambda);
 #pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            for(i=0; i<dimX; i++) {
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i+1; if (i1 == dimX) i1 = i-1;
                 i2 = i-1; if (i2 < 0) i2 = i+1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                j2 = j-1; if (j2 < 0) j2 = j+1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                k2 = k-1; if (k2 < 0) k2 = k+1;
+                
                 index = (dimX*dimY)*k + j*dimX+i;
-
+                
                 d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index];
                 b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index];
                 sum = d_val + b_val;
@@ -285,27 +287,28 @@ float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *
     float val1, val11, val2, val22, val3, val33, denom_lam;
     denom_lam = 1.0f/lambda;
 #pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            for(i=0; i<dimX; i++) {
+                
                 index = (dimX*dimY)*k + j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-
+                
                 val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
                 val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
                 val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
-
+                
                 val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
                 val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
                 val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
-
+                
                 if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f;
                 if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f;
                 if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f;
-
+                
             }}}
     return 1;
 }
@@ -315,23 +318,25 @@ float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx
     float val1, val11, val2, val3, denom, denom_lam;
     denom_lam = 1.0f/lambda;
 #pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + j*dimX+i;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            for(i=0; i<dimX; i++) {          
+                
                 /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-
+                i1 = i+1; if (i1 == dimX) i1 = i-1;                
+                
+                index = (dimX*dimY)*k + j*dimX+i;
+                
                 val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
                 val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
                 val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
-
+                
                 denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-
+                
                 val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-
+                
                 if (denom != 0.0f) {
                     Dx[index] = val11*(val1/denom);
                     Dy[index] = val11*(val2/denom);
@@ -349,15 +354,15 @@ float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *B
 {
     long i,j,k,i1,j1,k1,index;
 #pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
+        k1 = k+1; if (k1 == dimZ) k1 = k-1;
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            for(i=0; i<dimX; i++) {
+                index = (dimX*dimY)*k + j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-
+                
                 Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index];
                 By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index];
                 Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index];
diff --git a/src/Core/regularisers_CPU/TGV_core.c b/src/Core/regularisers_CPU/TGV_core.c
index f43b56a..3f917de 100644
--- a/src/Core/regularisers_CPU/TGV_core.c
+++ b/src/Core/regularisers_CPU/TGV_core.c
@@ -48,148 +48,148 @@ float TGV_main(float *U0, float *U, float *infovector, float lambda, float alpha
     re = 0.0f; re1 = 0.0f;
     int count = 0;
     float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
-
+    
     DimTotal = (long)(dimX*dimY*dimZ);
     copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
     tau = pow(L2,-0.5);
     sigma = pow(L2,-0.5);
-
+    
     /* dual variables */
     P1 = calloc(DimTotal, sizeof(float));
     P2 = calloc(DimTotal, sizeof(float));
-
+    
     Q1 = calloc(DimTotal, sizeof(float));
     Q2 = calloc(DimTotal, sizeof(float));
     Q3 = calloc(DimTotal, sizeof(float));
-
+    
     U_old = calloc(DimTotal, sizeof(float));
-
+    
     V1 = calloc(DimTotal, sizeof(float));
     V1_old = calloc(DimTotal, sizeof(float));
     V2 = calloc(DimTotal, sizeof(float));
     V2_old = calloc(DimTotal, sizeof(float));
-
+    
     if (dimZ == 1) {
         /*2D case*/
-
+        
         /* Primal-dual iterations begin here */
         for(ll = 0; ll < iter; ll++) {
-
+            
             /* Calculate Dual Variable P */
             DualP_2D(U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma);
-
+            
             /*Projection onto convex set for P*/
             ProjP_2D(P1, P2, (long)(dimX), (long)(dimY), alpha1);
-
+            
             /* Calculate Dual Variable Q */
             DualQ_2D(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma);
-
+            
             /*Projection onto convex set for Q*/
             ProjQ_2D(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0);
-
+            
             /*saving U into U_old*/
             copyIm(U, U_old, (long)(dimX), (long)(dimY), 1l);
-
+            
             /*adjoint operation  -> divergence and projection of P*/
             DivProjP_2D(U, U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau);
-
+            
             /*get updated solution U*/
             newU(U, U_old, (long)(dimX), (long)(dimY));
-
+            
             /*saving V into V_old*/
             copyIm(V1, V1_old, (long)(dimX), (long)(dimY), 1l);
             copyIm(V2, V2_old, (long)(dimX), (long)(dimY), 1l);
-
+            
             /* upd V*/
             UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau);
-
+            
             /*get new V*/
             newU(V1, V1_old, (long)(dimX), (long)(dimY));
             newU(V2, V2_old, (long)(dimX), (long)(dimY));
-
+            
             /* check early stopping criteria */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(U[j] - U_old[j],2);
-        	        re1 += powf(U[j],2);
-        	    }
-           re = sqrtf(re)/sqrtf(re1);
-           if (re < epsil)  count++;
-           if (count > 3) break;
-           }
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(U[j] - U_old[j],2);
+                    re1 += powf(U[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
+            }
         } /*end of iterations*/
     }
     else {
         /*3D case*/
         float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
-
+        
         P3 = calloc(DimTotal, sizeof(float));
         Q4 = calloc(DimTotal, sizeof(float));
         Q5 = calloc(DimTotal, sizeof(float));
         Q6 = calloc(DimTotal, sizeof(float));
         V3 = calloc(DimTotal, sizeof(float));
         V3_old = calloc(DimTotal, sizeof(float));
-
+        
         /* Primal-dual iterations begin here */
         for(ll = 0; ll < iter; ll++) {
-
+            
             /* Calculate Dual Variable P */
             DualP_3D(U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
-
+            
             /*Projection onto convex set for P*/
             ProjP_3D(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1);
-
+            
             /* Calculate Dual Variable Q */
             DualQ_3D(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
-
+            
             /*Projection onto convex set for Q*/
             ProjQ_3D(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0);
-
+            
             /*saving U into U_old*/
             copyIm(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /*adjoint operation  -> divergence and projection of P*/
             DivProjP_3D(U, U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau);
-
+            
             /*get updated solution U*/
             newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /*saving V into V_old*/
             copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* upd V*/
             UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
-
+            
             /*get new V*/
             newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-
+            
             /* check early stopping criteria */
             if ((epsil != 0.0f)  && (ll % 5 == 0)) {
-            re = 0.0f; re1 = 0.0f;
-	            for(j=0; j<DimTotal; j++)
-        	    {
-        	        re += powf(U[j] - U_old[j],2);
-        	        re1 += powf(U[j],2);
-        	    }
-           re = sqrtf(re)/sqrtf(re1);
-           if (re < epsil)  count++;
-           if (count > 3) break;
-           }
-
+                re = 0.0f; re1 = 0.0f;
+                for(j=0; j<DimTotal; j++)
+                {
+                    re += powf(U[j] - U_old[j],2);
+                    re1 += powf(U[j],2);
+                }
+                re = sqrtf(re)/sqrtf(re1);
+                if (re < epsil)  count++;
+                if (count > 3) break;
+            }
+            
         } /*end of iterations*/
         free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
     }
-
+    
     /*freeing*/
     free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old);
     free(V1);free(V2);free(V1_old);free(V2_old);
-
+    
     /*adding info into info_vector */
     infovector[0] = (float)(ll);  /*iterations number (if stopped earlier based on tolerance)*/
     infovector[1] = re;  /* reached tolerance */
-
+    
     return 0;
 }
 
@@ -201,15 +201,15 @@ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX,
 {
     long i,j, index;
 #pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
             /* symmetric boundary conditions (Neuman) */
             if (i == dimX-1) P1[index] += sigma*(-V1[index]);
             else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]);
             if (j == dimY-1) P2[index] += sigma*(-V2[index]);
             else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
-
+            
         }}
     return 1;
 }
@@ -219,8 +219,8 @@ float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1)
     float grad_magn;
     long i,j,index;
 #pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
             grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1;
             if (grad_magn > 1.0f) {
@@ -236,8 +236,8 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX,
     long i,j,index;
     float q1, q2, q11, q22;
 #pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
             q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
             /* boundary conditions (Neuman) */
@@ -260,8 +260,8 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alph
     float grad_magn;
     long i,j,index;
 #pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
             grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
             grad_magn = grad_magn/alpha0;
@@ -279,18 +279,18 @@ float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dim
     long i,j,index;
     float P_v1, P_v2, div;
 #pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
-
+            
             if (i == 0) P_v1 = P1[index];
             else if (i == dimX-1) P_v1 = -P1[j*dimX+(i-1)];
             else P_v1 = P1[index] - P1[j*dimX+(i-1)];
-
+            
             if (j == 0) P_v2 = P2[index];
             else if (j == dimY-1) P_v2 = -P2[(j-1)*dimX+i];
             else P_v2 = P2[index] - P2[(j-1)*dimX+i];
-
+            
             div = P_v1 + P_v2;
             U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
         }}
@@ -310,10 +310,10 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2,
     long i, j, index;
     float q1, q3_x, q3_y, q2, div1, div2;
 #pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
             index = j*dimX+i;
-
+            
             /* boundary conditions (Neuman) */
             if (i == 0) {
                 q1 = Q1[index];
@@ -324,7 +324,7 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2,
             else {
                 q1 = Q1[index] - Q1[j*dimX+(i-1)];
                 q3_x = Q3[index] - Q3[j*dimX+(i-1)];  }
-
+            
             if (j == 0) {
                 q2 = Q2[index];
                 q3_y = Q3[index]; }
@@ -334,8 +334,8 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2,
             else {
                 q2 = Q2[index] - Q2[(j-1)*dimX+i];
                 q3_y = Q3[index] - Q3[(j-1)*dimX+i]; }
-
-
+            
+            
             div1 = q1 + q3_y;
             div2 = q3_x + q2;
             V1[index] += tau*(P1[index] + div1);
@@ -352,9 +352,9 @@ float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2,
 {
     long i,j,k, index;
 #pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
                 /* symmetric boundary conditions (Neuman) */
                 if (i == dimX-1) P1[index] += sigma*(-V1[index]);
@@ -372,9 +372,9 @@ float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ,
     float grad_magn;
     long i,j,k,index;
 #pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
                 grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
                 if (grad_magn > 1.0f) {
@@ -391,9 +391,9 @@ float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3,
     long i,j,k,index;
     float q1, q2, q3, q11, q22, q33, q44, q55, q66;
 #pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
                 q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
                 /* symmetric boundary conditions (Neuman) */
@@ -412,7 +412,7 @@ float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3,
                     q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
                     q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
                 }
-
+                
                 Q1[index] += sigma*(q1); /*Q11*/
                 Q2[index] += sigma*(q2); /*Q22*/
                 Q3[index] += sigma*(q3); /*Q33*/
@@ -427,9 +427,9 @@ float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6,
     float grad_magn;
     long i,j,k,index;
 #pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
                 grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
                 grad_magn = grad_magn/alpha0;
@@ -450,11 +450,11 @@ float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dim
     long i,j,k,index;
     float P_v1, P_v2, P_v3, div;
 #pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
-
+                
                 if (i == 0) P_v1 = P1[index];
                 else if (i == dimX-1)  P_v1 = -P1[(dimX*dimY)*k + j*dimX+(i-1)];
                 else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
@@ -464,7 +464,7 @@ float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dim
                 if (k == 0) P_v3 = P3[index];
                 else if (k == dimZ-1) P_v3 = -P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
                 else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
-
+                
                 div = P_v1 + P_v2 + P_v3;
                 U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
             }}}
@@ -476,14 +476,14 @@ float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3,
     long i,j,k,index;
     float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
 #pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3)
-    for(i=0; i<dimX; i++) {
+    for(k=0; k<dimZ; k++) {
         for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
+            for(i=0; i<dimX; i++) {
                 index = (dimX*dimY)*k + j*dimX+i;
                 q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
                 /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
                 /* symmetric boundary conditions (Neuman) */
-
+                
                 if (i == 0) {
                     q1 = Q1[index];
                     q4x = Q4[index];
@@ -520,11 +520,11 @@ float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3,
                     q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
                     q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
                     q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; }
-
+                
                 div1 = q1 + q4y + q5z;
                 div2 = q4x + q2 + q6z;
                 div3 = q5x + q6y + q3;
-
+                
                 V1[index] += tau*(P1[index] + div1);
                 V2[index] += tau*(P2[index] + div2);
                 V3[index] += tau*(P3[index] + div3);