21 files changed, 1732 insertions, 111 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
index 26912b9..fecdeb1 100644
--- a/Core/CMakeLists.txt
+++ b/Core/CMakeLists.txt
@@ -84,9 +84,9 @@ message("Adding regularisers as a shared library")
 #set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC")
 add_library(cilreg SHARED
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_model_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchBased_Regul_core.c
-	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SplitBregman_TV_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_PD_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
@@ -130,6 +130,7 @@ if (CUDA_FOUND)
   CUDA_ADD_LIBRARY(cilregcuda SHARED
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
+	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
   )
   if (UNIX)
diff --git a/Core/regularisers_CPU/SB_TV_core.c b/Core/regularisers_CPU/SB_TV_core.c
new file mode 100755
index 0000000..93b4c2c
--- /dev/null
+++ b/Core/regularisers_CPU/SB_TV_core.c
@@ -0,0 +1,368 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+ 
+float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+	int ll, j, DimTotal;
+	float re, re1, lambda;
+    int count = 0;
+    mu = 1.0f/mu;
+	lambda = 2.0f*mu;
+
+	if (dimZ <= 1) {
+		/* 2D case */
+		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+		DimTotal = dimX*dimY;
+		
+		Output_prev = calloc(DimTotal, sizeof(float));
+		Dx = calloc(DimTotal, sizeof(float));
+		Dy = calloc(DimTotal, sizeof(float));
+		Bx = calloc(DimTotal, sizeof(float));
+		By = calloc(DimTotal, sizeof(float));
+        
+        copyIm(Input, Output, dimX, dimY, 1); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* storing old estimate */
+            copyIm(Output, Output_prev, dimX, dimY, 1);
+            
+            /* perform two GS iterations (normally 2 is enough for the convergence) */
+            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
+            copyIm(Output, Output_prev, dimX, dimY, 1);
+            /*GS iteration */
+            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
+            
+            /* TV-related step */
+            if (methodTV == 1)  updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            
+            /* update for Bregman variables */
+            updBxBy2D(Output, Dx, Dy, Bx, By, dimX, dimY);
+            
+            /* check early stopping criteria if epsilon not equal zero */
+            if (epsil != 0) {
+            re = 0.0f; re1 = 0.0f;
+				for(j=0; j<DimTotal; j++) {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+				}
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+			}
+            /*printf("%f %i %i \n", re, ll, count); */
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+		free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
+	}
+	else {
+		/* 3D case */
+		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+		DimTotal = dimX*dimY*dimZ;
+		
+		Output_prev = calloc(DimTotal, sizeof(float));
+		Dx = calloc(DimTotal, sizeof(float));
+		Dy = calloc(DimTotal, sizeof(float));
+		Dz = calloc(DimTotal, sizeof(float));
+		Bx = calloc(DimTotal, sizeof(float));
+		By = calloc(DimTotal, sizeof(float));
+		Bz = calloc(DimTotal, sizeof(float));
+        
+        copyIm(Input, Output, dimX, dimY, dimZ); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* storing old estimate */
+            copyIm(Output, Output_prev, dimX, dimY, dimZ);
+            
+             /* perform two GS iterations (normally 2 is enough for the convergence) */
+            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
+            copyIm(Output, Output_prev, dimX, dimY, dimZ);
+            /*GS iteration */
+            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
+            
+            /* TV-related step */
+            if (methodTV == 1)  updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            
+            /* update for Bregman variables */
+            updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
+            
+            /* check early stopping criteria if epsilon not equal zero */
+            if (epsil != 0) {
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++) {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+				}
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+			}
+            /*printf("%f %i %i \n", re, ll, count); */
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+		free(Output_prev); free(Dx); free(Dy); free(Dz); free(Bx); free(By); free(Bz);
+	}
+	return *Output;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
+{
+    float sum, normConst;
+    int i,j,i1,i2,j1,j2,index;
+    normConst = 1.0f/(mu + 4.0f*lambda);
+    
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+            
+            sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index];
+            sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i];
+            sum *= lambda;
+            sum += mu*A[index];
+            U[index] = normConst*sum;
+        }}
+    return *U;
+}
+
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1,index;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*dimX+i] - U[index]) + By[index];
+            
+            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
+            if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
+            
+        }}
+    return 1;
+}
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1,index;
+    float val1, val11, val2, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+    
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*dimX+i] - U[index]) + By[index];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+            }
+            else {
+                Dx[index] = 0;
+                Dy[index] = 0;
+            }
+        }}
+    return 1;
+}
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
+{
+    int i,j,i1,j1,index;
+#pragma omp parallel for shared(U) private(index,i,j,i1,j1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index];
+            By[index] += (U[j1*dimX+i] - U[index]) - Dy[index];
+        }}
+    return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/*****************************************************************/
+float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
+{
+    float normConst, d_val, b_val, sum;
+    int i,j,i1,i2,j1,j2,k,k1,k2,index;
+    normConst = 1.0f/(mu + 6.0f*lambda);
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                j2 = j-1; if (j2 < 0) j2 = j+1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                k2 = k-1; if (k2 < 0) k2 = k+1;
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index];
+                b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index];
+                sum = d_val + b_val;
+                sum += U_prev[(dimX*dimY)*k + j*dimX+i1] + U_prev[(dimX*dimY)*k + j*dimX+i2] + U_prev[(dimX*dimY)*k + j1*dimX+i] + U_prev[(dimX*dimY)*k + j2*dimX+i] + U_prev[(dimX*dimY)*k1 + j*dimX+i] + U_prev[(dimX*dimY)*k2 + j*dimX+i];
+                sum *= lambda;
+                sum += mu*A[index];
+                U[index] = normConst*sum;
+            }}}
+    return *U;
+}
+
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val22, val3, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+                
+                val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+                val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+                val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+                
+                if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f;
+                if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f;
+                if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f;
+                
+            }}}
+    return 1;
+}
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val3, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+                
+                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+                
+                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+                
+                if (denom != 0.0f) {
+                    Dx[index] = val11*(val1/denom);
+                    Dy[index] = val11*(val2/denom);
+                    Dz[index] = val11*(val3/denom);
+                }
+                else {
+                    Dx[index] = 0;
+                    Dy[index] = 0;
+                    Dz[index] = 0;
+                }
+            }}}
+    return 1;
+}
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
+{
+    int i,j,k,i1,j1,k1,index;
+#pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index];
+                By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index];
+                Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index];
+            }}}
+    return 1;
+}
diff --git a/Core/regularisers_CPU/SB_TV_core.h b/Core/regularisers_CPU/SB_TV_core.h
new file mode 100755
index 0000000..791d951
--- /dev/null
+++ b/Core/regularisers_CPU/SB_TV_core.h
@@ -0,0 +1,37 @@
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
+
+float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index 3fbbcde..c0ae890 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -173,28 +173,6 @@ __global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
         if (Output[index] < 0.0f) Output[index] = 0.0f;
     }
 }
-__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}   
 /************************************************/
 /*****************3D modules*********************/
 /************************************************/
@@ -294,8 +272,6 @@ __global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N,
     }
     return;
 }
-
-
 __global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
 {
     //calculate each thread global index
@@ -325,8 +301,19 @@ __global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_tota
         if (Output[index] < 0.0f) Output[index] = 0.0f;
     }
 }
+__global__ void FGPcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
 
-__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+__global__ void FGPcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
 {
 	int i = blockDim.x * blockIdx.x + threadIdx.x;
     int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -338,6 +325,32 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z,
         Output[index] = Input[index];
     }
 }
+
+__global__ void FGPResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+__global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 
 ////////////MAIN HOST FUNCTION ///////////////
@@ -418,7 +431,7 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
         
             if (epsil != 0.0f) {
                 /* calculate norm - stopping rules using the Thrust library */
-                ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+                FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );               
                 
@@ -431,16 +444,16 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
                 if (re < epsil)  count++;
                     if (count > 4) break;       
              
-                copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+                FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );                                              
             }                  
         
-            copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
-            copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );       
  
@@ -526,15 +539,15 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );           
         
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );   
             
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );      
  
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
new file mode 100755
index 0000000..0bc466f
--- /dev/null
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -0,0 +1,562 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TV_SB_GPU_core.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL parameter]
+* 7. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void gauss_seidel2D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, float lambda, float mu, float normConst, int N, int M, int ImSize)
+{
+    
+    float sum;
+    int i1,i2,j1,j2;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        
+        sum = Dx[j*N+i2] - Dx[index] + Dy[j2*N+i] - Dy[index] - Bx[j*N+i2] + Bx[index] - By[j2*N+i] + By[index];
+        sum += U_prev[j*N+i1] + U_prev[j*N+i2] + U_prev[j1*N+i] + U_prev[j2*N+i];
+        sum *= lambda;
+        sum += mu*A[index];
+        U[index] = normConst*sum; //Write final result to global memory
+    }
+    return;
+}
+__global__ void updDxDy_shrinkAniso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+    
+    int i1,j1;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+                
+            val1 = (U[j*N+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*N+i] - U[index]) + By[index];
+            
+            val11 = abs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = abs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0;
+            if (val2 !=0) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0;
+    }
+    return;
+}
+
+__global__ void updDxDy_shrinkIso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+    
+    int i1,j1;
+    float val1, val11, val2, denom_lam, denom;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        
+            val1 = (U[j*N+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*N+i] - U[index]) + By[index];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+            }
+            else {
+                Dx[index] = 0;
+                Dy[index] = 0;
+            }
+    }
+    return;
+}
+
+__global__ void updBxBy2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, int N, int M, int ImSize)
+{    
+    int i1,j1;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == N) i1 = i-1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            
+            Bx[index] += (U[j*N+i1] - U[index]) - Dx[index];
+            By[index] += (U[j1*N+i] - U[index]) - Dy[index];
+    }
+    return;
+}
+
+
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void gauss_seidel3D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, float mu, float normConst, int N, int M, int Z, int ImSize)
+{
+    
+    float sum,d_val,b_val;
+    int i1,i2,j1,j2,k1,k2;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
+        
+        d_val = Dx[(N*M)*k + j*N+i2] - Dx[index] + Dy[(N*M)*k + j2*N+i] - Dy[index] + Dz[(N*M)*k2 + j*N+i] - Dz[index];
+        b_val = -Bx[(N*M)*k + j*N+i2] + Bx[index] - By[(N*M)*k + j2*N+i] + By[index] - Bz[(N*M)*k2 + j*N+i] + Bz[index];
+        sum = d_val + b_val;
+        sum += U_prev[(N*M)*k + j*N+i1] + U_prev[(N*M)*k + j*N+i2] + U_prev[(N*M)*k + j1*N+i] + U_prev[(N*M)*k + j2*N+i] + U_prev[(N*M)*k1 + j*N+i] + U_prev[(N*M)*k2 + j*N+i];
+        sum *= lambda;
+        sum += mu*A[index];
+        U[index] = normConst*sum;
+    }
+    return;
+}
+__global__ void updDxDy_shrinkAniso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+    
+    int i1,j1,k1;
+    float val1, val11, val2, val3, val22, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+                
+            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+            
+            val11 = abs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+            val22 = abs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+            val33 = abs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+            
+            if (val1 !=0.0f) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0.0f;
+            if (val2 !=0.0f) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0.0f;
+            if (val3 !=0.0f) Dz[index] = (val3/abs(val3))*val33; else Dz[index] = 0.0f;
+    }
+    return;
+}
+
+__global__ void updDxDy_shrinkIso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+    
+    int i1,j1,k1;
+    float val1, val11, val2, val3, denom_lam, denom;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+        
+            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+            
+            denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+            
+            val11 = (denom - denom_lam); if (val11 < 0.0f) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+                Dz[index] = val11*(val3/denom);
+            }
+            else {
+                Dx[index] = 0.0f;
+                Dy[index] = 0.0f;
+                Dz[index] = 0.0f;
+            }
+    }
+    return;
+}
+
+__global__ void updBxBy3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int N, int M, int Z, int ImSize)
+{    
+    int i1,j1,k1;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == N) i1 = i-1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            k1 = k+1; if (k1 == Z) k1 = k-1;
+            
+            Bx[index] += (U[(N*M)*k + i1 + N*j] - U[index]) - Dx[index];
+            By[index] += (U[(N*M)*k + i + N*j1] - U[index]) - Dy[index];
+            Bz[index] += (U[(N*M)*k1 + i + N*j] - U[index]) - Dz[index];
+    }
+    return;
+}
+
+__global__ void SBcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void SBcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void SBResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+__global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }
+    
+	int ll, DimTotal;
+	float re, lambda, normConst;
+    int count = 0;
+    mu = 1.0f/mu;
+	lambda = 2.0f*mu;
+
+    if (dimZ <= 1) {
+		/*2D verson*/
+		DimTotal = dimX*dimY;
+		normConst = 1.0f/(mu + 4.0f*lambda);
+		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+   
+		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        cudaMemset(Dx, 0, DimTotal*sizeof(float));
+        cudaMemset(Dy, 0, DimTotal*sizeof(float));
+        cudaMemset(Bx, 0, DimTotal*sizeof(float));
+        cudaMemset(By, 0, DimTotal*sizeof(float));
+
+        /********************** Run CUDA 2D kernels here ********************/   
+        /* The main kernel */
+        for (ll = 0; ll < iter; ll++) {
+        
+        /* storing old value */
+        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+
+		 /* perform two GS iterations (normally 2 is enough for the convergence) */
+        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+        /* 2nd GS iteration */
+        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+        /* TV-related step */
+          if (methodTV == 1)  updDxDy_shrinkAniso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+          else updDxDy_shrinkIso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+            
+        /* update for Bregman variables */
+        updBxBy2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+          if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                SBResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, DimTotal);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
+                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;
+          }
+        
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            cudaFree(d_update_prev);
+            if (epsil != 0.0f) cudaFree(d_res);
+            cudaFree(Dx);
+            cudaFree(Dy);
+            cudaFree(Bx);
+            cudaFree(By);
+    }
+    else {
+		/*3D verson*/
+		DimTotal = dimX*dimY*dimZ;
+		normConst = 1.0f/(mu + 6.0f*lambda);
+		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+   
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dz,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bz,DimTotal*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        cudaMemset(Dx, 0, DimTotal*sizeof(float));
+        cudaMemset(Dy, 0, DimTotal*sizeof(float));
+        cudaMemset(Dz, 0, DimTotal*sizeof(float));
+        cudaMemset(Bx, 0, DimTotal*sizeof(float));
+        cudaMemset(By, 0, DimTotal*sizeof(float));
+        cudaMemset(Bz, 0, DimTotal*sizeof(float));
+
+        /********************** Run CUDA 3D kernels here ********************/   
+        /* The main kernel */
+        for (ll = 0; ll < iter; ll++) {
+        
+        /* storing old value */
+        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );
+
+		 /* perform two GS iterations (normally 2 is enough for the convergence) */
+        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+        /* 2nd GS iteration */
+        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+        /* TV-related step */
+          if (methodTV == 1)  updDxDy_shrinkAniso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+          else updDxDy_shrinkIso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+            
+        /* update for Bregman variables */
+        updBxBy3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+          if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                SBResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, dimZ, DimTotal);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
+                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);
+                if (re < epsil)  count++;
+                    if (count > 4) break;
+          }
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            cudaFree(d_update_prev);
+            if (epsil != 0.0f) cudaFree(d_res);
+            cudaFree(Dx);
+            cudaFree(Dy);
+            cudaFree(Dz);
+            cudaFree(Bx);
+            cudaFree(By);
+            cudaFree(Bz);
+    } 
+    cudaDeviceReset(); 
+}
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h
new file mode 100755
index 0000000..bdc9219
--- /dev/null
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -0,0 +1,10 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+
+#ifndef _SB_TV_GPU_
+#define _SB_TV_GPU_
+
+extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index 04047a5..d4699f9 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -240,6 +240,20 @@ __global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int
         Output[index] = Input[index];
     }
 }
+
+__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
 __global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
 {
     int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
@@ -250,7 +264,21 @@ __global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Outpu
     if (index < num_total)	{
         Output[index] = Input1[index] - Input2[index];
     }
-}   
+}
+
+__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
 /************************************************/
 /*****************3D modules*********************/
 /************************************************/
@@ -437,32 +465,6 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t
         if (Output[index] < 0.0f) Output[index] = 0.0f;
     }
 }
-
-__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 
 ////////////MAIN HOST FUNCTION ///////////////
diff --git a/Readme.md b/Readme.md
index 31d03a1..a260bb3 100644
--- a/Readme.md
+++ b/Readme.md
@@ -14,11 +14,12 @@ can also be used as image denoising iterative filters. The core modules are writ
 ## Package modules (regularisers):
 
 ### Single-channel
-1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) [2D/3D GPU/CPU]; (Ref. 1)
-2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D GPU/CPU]; (Ref. 2)
+1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) [2D/3D CPU/GPU]; (Ref. 1)
+2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D CPU/GPU]; (Ref. 2)
+3. Split-Bregman (SB) Total Variation [2D/3D CPU/GPU]; (Ref. 4)
 
 ### Multi-channel
-1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D GPU/CPU]; (Ref. 4,2)
+1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D CPU/GPU]; (Ref. 3,2)
 
 ## Installation:
 
@@ -43,12 +44,11 @@ can also be used as image denoising iterative filters. The core modules are writ
 ### References:
 1. Rudin, L.I., Osher, S. and Fatemi, E., 1992. Nonlinear total variation based noise removal algorithms. Physica D: nonlinear phenomena, 60(1-4), pp.259-268.
 2. Beck, A. and Teboulle, M., 2009. Fast gradient-based algorithms for constrained total variation image denoising and deblurring problems. IEEE Transactions on Image Processing, 18(11), pp.2419-2434.
-3. Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-4. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106.
+3. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106.
+4. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
 
 ### License:
 [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
 
 ### Acknowledgments:
 CCPi-RGL software is a product of the [CCPi](https://www.ccpi.ac.uk/) group and STFC SCD software developers. Any relevant questions/comments can be e-mailed to Daniil Kazantsev at dkazanc@hotmail.com
-
diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
index dc49d9c..fb55097 100644
--- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
@@ -14,35 +14,47 @@ end
 vol3D(vol3D < 0) = 0;
 figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
 
+
+lambda_reg = 0.03; % regularsation parameter for all methods
 %%
 fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
-lambda_rof = 0.03; % regularisation parameter
 tau_rof = 0.0025; % time-marching constant 
 iter_rof = 300; % number of ROF iterations
-tic; u_rof = ROF_TV(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; 
+tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; 
 figure; imshow(u_rof(:,:,15), [0 1]); title('ROF-TV denoised volume (CPU)');
 %%
 % fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
-% lambda_rof = 0.03; % regularisation parameter
 % tau_rof = 0.0025; % time-marching constant 
 % iter_rof = 300; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_rof, iter_rof, tau_rof); toc;
+% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc;
 % figure; imshow(u_rofG(:,:,15), [0 1]); title('ROF-TV denoised volume (GPU)');
 %%
 fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
-lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 300; % number of FGP iterations
 epsil_tol =  1.0e-05; % tolerance
-tic; u_fgp = FGP_TV(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; 
+tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
 figure; imshow(u_fgp(:,:,15), [0 1]); title('FGP-TV denoised volume (CPU)');
 %%
 % fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
-% lambda_fgp = 0.03; % regularisation parameter
 % iter_fgp = 300; % number of FGP iterations
 % epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; 
+% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
 % figure; imshow(u_fgpG(:,:,15), [0 1]); title('FGP-TV denoised volume (GPU)');
 %%
+fprintf('Denoise a volume using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-05; % tolerance
+tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+figure; imshow(u_sb(:,:,15), [0 1]); title('SB-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+% figure; imshow(u_sbG(:,:,15), [0 1]); title('SB-TV denoised volume (GPU)');
+%%
+
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
 fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
 
 % create another volume (reference) with slightly less amount of noise
@@ -53,11 +65,10 @@ end
 vol3D_ref(vol3D_ref < 0) = 0;
 % vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
 
-lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 300; % number of FGP iterations
 epsil_tol =  1.0e-05; % tolerance
 eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
 figure; imshow(u_fgp_dtv(:,:,15), [0 1]); title('FGP-dTV denoised volume (CPU)');
 %%
 fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
@@ -70,10 +81,9 @@ end
 vol3D_ref(vol3D_ref < 0) = 0;
 % vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
 
-lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 300; % number of FGP iterations
 epsil_tol =  1.0e-05; % tolerance
 eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
 figure; imshow(u_fgp_dtv_g(:,:,15), [0 1]); title('FGP-dTV denoised volume (GPU)');
 %%
 \ No newline at end of file
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index 145f2ff..129bedc 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -8,45 +8,55 @@ Im = double(imread('lena_gray_512.tif'))/255;  % loading image
 u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
 figure; imshow(u0, [0 1]); title('Noisy image');
 
+lambda_reg = 0.03; % regularsation parameter for all methods
 %%
 fprintf('Denoise using the ROF-TV model (CPU) \n');
-lambda_rof = 0.03; % regularisation parameter
 tau_rof = 0.0025; % time-marching constant 
 iter_rof = 2000; % number of ROF iterations
-tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; 
+tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
 figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
 %%
 % fprintf('Denoise using the ROF-TV model (GPU) \n');
-% lambda_rof = 0.03; % regularisation parameter
 % tau_rof = 0.0025; % time-marching constant 
 % iter_rof = 2000; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc;
+% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;
 % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
 %%
 fprintf('Denoise using the FGP-TV model (CPU) \n');
-lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 1000; % number of FGP iterations
 epsil_tol =  1.0e-06; % tolerance
-tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; 
+tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
 figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
 %%
 % fprintf('Denoise using the FGP-TV model (GPU) \n');
-% lambda_fgp = 0.03; % regularisation parameter
 % iter_fgp = 1000; % number of FGP iterations
 % epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; 
+% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
 % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
 %%
+fprintf('Denoise using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-06; % tolerance
+tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
+%%
+% fprintf('Denoise using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-06; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
+%%
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
+
 fprintf('Denoise using the FGP-dTV model (CPU) \n');
 % create another image (reference) with slightly less amount of noise
 u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
 % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
 
-lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 1000; % number of FGP iterations
 epsil_tol =  1.0e-06; % tolerance
 eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
 figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
 %%
 % fprintf('Denoise using the FGP-dTV model (GPU) \n');
@@ -54,10 +64,9 @@ figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
 % u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
 % % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
 % 
-% lambda_fgp = 0.03; % regularisation parameter
 % iter_fgp = 1000; % number of FGP iterations
 % epsil_tol =  1.0e-06; % tolerance
 % eta =  0.2; % Reference image gradient smoothing constant
-% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
 % figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
 %%
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex.m b/Wrappers/Matlab/mex_compile/compileCPU_mex.m
index 71f345a..c3c82ff 100644
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileCPU_mex.m
@@ -11,10 +11,13 @@ movefile ROF_TV.mex* ../installed/
 mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile FGP_TV.mex* ../installed/
 
+mex SB_TV.c SB_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile SB_TV.mex* ../installed/
+
 mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile FGP_dTV.mex* ../installed/
 
-delete ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h
 
 fprintf('%s \n', 'All successfully compiled!');
 
diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
index f58e9bc..3dbeb8a 100644
--- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
@@ -1,13 +1,13 @@
 % execute this mex file in Matlab once
 
-%>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<<
+%>>>>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<<
 % In order to compile CUDA modules one needs to have nvcc-compiler
-% installed (see CUDA SDK)
-% check it under MATLAB with !nvcc --version
-% In the code bellow we provide a full path to nvcc compiler 
+% installed (see CUDA SDK), check it under MATLAB with !nvcc --version
+
+% In the code bellow we provide a full explicit path to nvcc compiler 
 % ! paths to matlab and CUDA sdk can be different, modify accordingly !
 
-% tested on Ubuntu 16.04/MATLAB 2016b
+% tested on Ubuntu 16.04/MATLAB 2016b/cuda7.5/gcc4.9
 
 copyfile ../../../Core/regularisers_GPU/ regularisers_GPU/
 copyfile ../../../Core/CCPiDefines.h regularisers_GPU/
@@ -23,11 +23,15 @@ movefile ROF_TV_GPU.mex* ../installed/
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
 movefile FGP_TV_GPU.mex* ../installed/
 
+!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
+movefile SB_TV_GPU.mex* ../installed/
+
 !/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
 movefile FGP_dTV_GPU.mex* ../installed/
 
-delete TV_ROF_GPU_core* TV_FGP_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h
+delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h
 fprintf('%s \n', 'All successfully compiled!');
 
 cd ../../
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c
new file mode 100644
index 0000000..d284cac
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c
@@ -0,0 +1,89 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch;
+    const int  *dim_array;
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 100; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (nrhs == 6)  {
+        printswitch = (int) mxGetScalar(prhs[5]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    SB_TV_CPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
new file mode 100644
index 0000000..60847d9
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
@@ -0,0 +1,89 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_SB_GPU_core.h"
+
+/* CUDA mex-file for implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch;
+    const int  *dim_array;
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 100; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (nrhs == 6)  {
+        printswitch = (int) mxGetScalar(prhs[5]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    TV_SB_GPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
index 376cc9c..50c4374 100644
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ b/Wrappers/Python/ccpi/filters/regularisers.py
@@ -2,8 +2,8 @@
 script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
 """
 
-from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU
-from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU
+from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU
+from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU
 
 def ROF_TV(inputData, regularisation_parameter, iterations,
                      time_marching_parameter,device='cpu'):
@@ -42,6 +42,25 @@ def FGP_TV(inputData, regularisation_parameter,iterations,
     else:
         raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
                          .format(device))
+def SB_TV(inputData, regularisation_parameter, iterations,
+                     tolerance_param, methodTV, printM, device='cpu'):
+    if device == 'cpu':
+        return TV_SB_CPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    elif device == 'gpu':
+        return TV_SB_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    else:
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
 def FGP_dTV(inputData, refdata, regularisation_parameter, iterations,
                      tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'):
     if device == 'cpu':
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index 00beb0b..0e4355b 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -141,13 +141,60 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_____________FGP-dTV (2D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure(4)
 plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -223,7 +270,7 @@ print ("_______________ROF-TV (3D)_________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure(5)
 plt.suptitle('Performance of ROF-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy 15th slice of a volume')
@@ -263,7 +310,7 @@ print ("_______________FGP-TV (3D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(5)
+fig = plt.figure(6)
 plt.suptitle('Performance of FGP-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -307,13 +354,59 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(7)
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(idealVol, sb_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using SB-TV'))
+
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________FGP-dTV (3D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(6)
+fig = plt.figure(8)
 plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index 310cf75..d8e2da7 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -218,13 +218,99 @@ if (diff_im.sum() > 1):
 else:
     print ("Arrays match")
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________SB-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB-TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(sb_cpu - sb_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________FGP-dTV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure(4)
 plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index 24a3c88..25d8d85 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -139,12 +139,59 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
+
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-dTV bench___________________")
+print ("____________SB-TV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
 fig = plt.figure(3)
+plt.suptitle('Performance of the SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(4)
 plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -219,7 +266,7 @@ print ("_______________ROF-TV (3D)_________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure(5)
 plt.suptitle('Performance of ROF-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy 15th slice of a volume')
@@ -259,7 +306,7 @@ print ("_______________FGP-TV (3D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(5)
+fig = plt.figure(6)
 plt.suptitle('Performance of FGP-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -302,13 +349,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(7)
+plt.suptitle('Performance of SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :100 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############SB TV GPU####################")
+start_time = timeit.default_timer()
+sb_gpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, sb_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using SB-TV'))
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________FGP-dTV (3D)________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(6)
+fig = plt.figure(8)
 plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in
index c7ebb5c..0681cc4 100644
--- a/Wrappers/Python/setup-regularisers.py.in
+++ b/Wrappers/Python/setup-regularisers.py.in
@@ -36,6 +36,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"),
                        os.path.join(".." , ".." , "Core",  "regularisers_CPU"),
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
 						   "."]
 
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index 1661375..417670d 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -20,6 +20,7 @@ cimport numpy as np
 
 cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
 cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
 cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 
 
@@ -125,6 +126,63 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        printM,
                        dims[2], dims[1], dims[0])
     return outputData 
+
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def TV_SB_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM):
+    if inputData.ndim == 2:
+        return TV_SB_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+    elif inputData.ndim == 3:
+        return TV_SB_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+
+def TV_SB_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run SB-TV iterations for 2D data */
+    SB_TV_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[0], dims[1], 1)
+    
+    return outputData        
+            
+def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run SB-TV iterations for 3D data */
+    SB_TV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[2], dims[1], dims[0])
+    return outputData 
 #****************************************************************#
 #**************Directional Total-variation FGP ******************#
 #****************************************************************#
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index 18efdcd..36eec95 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -20,6 +20,7 @@ cimport numpy as np
 
 cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
 cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
 cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
 
 # Total-variation Rudin-Osher-Fatemi (ROF)
@@ -62,6 +63,27 @@ def TV_FGP_GPU(inputData,
                      methodTV,
                      nonneg,
                      printM)
+# Total-variation Split Bregman (SB)
+def TV_SB_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM):
+    if inputData.ndim == 2:
+        return SBTV2D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    elif inputData.ndim == 3:
+        return SBTV3D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
 # Directional Total-variation Fast-Gradient-Projection (FGP)
 def dTV_FGP_GPU(inputData,
                      refdata,
@@ -197,7 +219,60 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        dims[2], dims[1], dims[0]);   
      
     return outputData 
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],                        
+                       regularisation_parameter, 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[0], dims[1], 1);   
+     
+    return outputData
     
+def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0], 
+                       regularisation_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[2], dims[1], dims[0]);
+     
+    return outputData 
 #****************************************************************#
 #**************Directional Total-variation FGP ******************#
 #****************************************************************#